]> gcc.gnu.org Git - gcc.git/blob - gcc/config/s390/s390.c
9807e64376c9f54565c184b028e6c578b00f4426
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "print-tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "varasm.h"
36 #include "calls.h"
37 #include "tm_p.h"
38 #include "regs.h"
39 #include "hard-reg-set.h"
40 #include "insn-config.h"
41 #include "conditions.h"
42 #include "output.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "except.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expmed.h"
49 #include "dojump.h"
50 #include "explow.h"
51 #include "emit-rtl.h"
52 #include "stmt.h"
53 #include "expr.h"
54 #include "reload.h"
55 #include "diagnostic-core.h"
56 #include "predict.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfgrtl.h"
60 #include "cfganal.h"
61 #include "lcm.h"
62 #include "cfgbuild.h"
63 #include "cfgcleanup.h"
64 #include "basic-block.h"
65 #include "target.h"
66 #include "target-def.h"
67 #include "debug.h"
68 #include "langhooks.h"
69 #include "insn-codes.h"
70 #include "optabs.h"
71 #include "tree-ssa-alias.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimple-expr.h"
76 #include "gimple.h"
77 #include "gimplify.h"
78 #include "df.h"
79 #include "params.h"
80 #include "cfgloop.h"
81 #include "opts.h"
82 #include "tree-pass.h"
83 #include "context.h"
84 #include "builtins.h"
85 #include "rtl-iter.h"
86 #include "intl.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90
91 /* Define the specific costs for a given cpu. */
92
93 struct processor_costs
94 {
95 /* multiplication */
96 const int m; /* cost of an M instruction. */
97 const int mghi; /* cost of an MGHI instruction. */
98 const int mh; /* cost of an MH instruction. */
99 const int mhi; /* cost of an MHI instruction. */
100 const int ml; /* cost of an ML instruction. */
101 const int mr; /* cost of an MR instruction. */
102 const int ms; /* cost of an MS instruction. */
103 const int msg; /* cost of an MSG instruction. */
104 const int msgf; /* cost of an MSGF instruction. */
105 const int msgfr; /* cost of an MSGFR instruction. */
106 const int msgr; /* cost of an MSGR instruction. */
107 const int msr; /* cost of an MSR instruction. */
108 const int mult_df; /* cost of multiplication in DFmode. */
109 const int mxbr;
110 /* square root */
111 const int sqxbr; /* cost of square root in TFmode. */
112 const int sqdbr; /* cost of square root in DFmode. */
113 const int sqebr; /* cost of square root in SFmode. */
114 /* multiply and add */
115 const int madbr; /* cost of multiply and add in DFmode. */
116 const int maebr; /* cost of multiply and add in SFmode. */
117 /* division */
118 const int dxbr;
119 const int ddbr;
120 const int debr;
121 const int dlgr;
122 const int dlr;
123 const int dr;
124 const int dsgfr;
125 const int dsgr;
126 };
127
128 const struct processor_costs *s390_cost;
129
130 static const
131 struct processor_costs z900_cost =
132 {
133 COSTS_N_INSNS (5), /* M */
134 COSTS_N_INSNS (10), /* MGHI */
135 COSTS_N_INSNS (5), /* MH */
136 COSTS_N_INSNS (4), /* MHI */
137 COSTS_N_INSNS (5), /* ML */
138 COSTS_N_INSNS (5), /* MR */
139 COSTS_N_INSNS (4), /* MS */
140 COSTS_N_INSNS (15), /* MSG */
141 COSTS_N_INSNS (7), /* MSGF */
142 COSTS_N_INSNS (7), /* MSGFR */
143 COSTS_N_INSNS (10), /* MSGR */
144 COSTS_N_INSNS (4), /* MSR */
145 COSTS_N_INSNS (7), /* multiplication in DFmode */
146 COSTS_N_INSNS (13), /* MXBR */
147 COSTS_N_INSNS (136), /* SQXBR */
148 COSTS_N_INSNS (44), /* SQDBR */
149 COSTS_N_INSNS (35), /* SQEBR */
150 COSTS_N_INSNS (18), /* MADBR */
151 COSTS_N_INSNS (13), /* MAEBR */
152 COSTS_N_INSNS (134), /* DXBR */
153 COSTS_N_INSNS (30), /* DDBR */
154 COSTS_N_INSNS (27), /* DEBR */
155 COSTS_N_INSNS (220), /* DLGR */
156 COSTS_N_INSNS (34), /* DLR */
157 COSTS_N_INSNS (34), /* DR */
158 COSTS_N_INSNS (32), /* DSGFR */
159 COSTS_N_INSNS (32), /* DSGR */
160 };
161
162 static const
163 struct processor_costs z990_cost =
164 {
165 COSTS_N_INSNS (4), /* M */
166 COSTS_N_INSNS (2), /* MGHI */
167 COSTS_N_INSNS (2), /* MH */
168 COSTS_N_INSNS (2), /* MHI */
169 COSTS_N_INSNS (4), /* ML */
170 COSTS_N_INSNS (4), /* MR */
171 COSTS_N_INSNS (5), /* MS */
172 COSTS_N_INSNS (6), /* MSG */
173 COSTS_N_INSNS (4), /* MSGF */
174 COSTS_N_INSNS (4), /* MSGFR */
175 COSTS_N_INSNS (4), /* MSGR */
176 COSTS_N_INSNS (4), /* MSR */
177 COSTS_N_INSNS (1), /* multiplication in DFmode */
178 COSTS_N_INSNS (28), /* MXBR */
179 COSTS_N_INSNS (130), /* SQXBR */
180 COSTS_N_INSNS (66), /* SQDBR */
181 COSTS_N_INSNS (38), /* SQEBR */
182 COSTS_N_INSNS (1), /* MADBR */
183 COSTS_N_INSNS (1), /* MAEBR */
184 COSTS_N_INSNS (60), /* DXBR */
185 COSTS_N_INSNS (40), /* DDBR */
186 COSTS_N_INSNS (26), /* DEBR */
187 COSTS_N_INSNS (176), /* DLGR */
188 COSTS_N_INSNS (31), /* DLR */
189 COSTS_N_INSNS (31), /* DR */
190 COSTS_N_INSNS (31), /* DSGFR */
191 COSTS_N_INSNS (31), /* DSGR */
192 };
193
194 static const
195 struct processor_costs z9_109_cost =
196 {
197 COSTS_N_INSNS (4), /* M */
198 COSTS_N_INSNS (2), /* MGHI */
199 COSTS_N_INSNS (2), /* MH */
200 COSTS_N_INSNS (2), /* MHI */
201 COSTS_N_INSNS (4), /* ML */
202 COSTS_N_INSNS (4), /* MR */
203 COSTS_N_INSNS (5), /* MS */
204 COSTS_N_INSNS (6), /* MSG */
205 COSTS_N_INSNS (4), /* MSGF */
206 COSTS_N_INSNS (4), /* MSGFR */
207 COSTS_N_INSNS (4), /* MSGR */
208 COSTS_N_INSNS (4), /* MSR */
209 COSTS_N_INSNS (1), /* multiplication in DFmode */
210 COSTS_N_INSNS (28), /* MXBR */
211 COSTS_N_INSNS (130), /* SQXBR */
212 COSTS_N_INSNS (66), /* SQDBR */
213 COSTS_N_INSNS (38), /* SQEBR */
214 COSTS_N_INSNS (1), /* MADBR */
215 COSTS_N_INSNS (1), /* MAEBR */
216 COSTS_N_INSNS (60), /* DXBR */
217 COSTS_N_INSNS (40), /* DDBR */
218 COSTS_N_INSNS (26), /* DEBR */
219 COSTS_N_INSNS (30), /* DLGR */
220 COSTS_N_INSNS (23), /* DLR */
221 COSTS_N_INSNS (23), /* DR */
222 COSTS_N_INSNS (24), /* DSGFR */
223 COSTS_N_INSNS (24), /* DSGR */
224 };
225
226 static const
227 struct processor_costs z10_cost =
228 {
229 COSTS_N_INSNS (10), /* M */
230 COSTS_N_INSNS (10), /* MGHI */
231 COSTS_N_INSNS (10), /* MH */
232 COSTS_N_INSNS (10), /* MHI */
233 COSTS_N_INSNS (10), /* ML */
234 COSTS_N_INSNS (10), /* MR */
235 COSTS_N_INSNS (10), /* MS */
236 COSTS_N_INSNS (10), /* MSG */
237 COSTS_N_INSNS (10), /* MSGF */
238 COSTS_N_INSNS (10), /* MSGFR */
239 COSTS_N_INSNS (10), /* MSGR */
240 COSTS_N_INSNS (10), /* MSR */
241 COSTS_N_INSNS (1) , /* multiplication in DFmode */
242 COSTS_N_INSNS (50), /* MXBR */
243 COSTS_N_INSNS (120), /* SQXBR */
244 COSTS_N_INSNS (52), /* SQDBR */
245 COSTS_N_INSNS (38), /* SQEBR */
246 COSTS_N_INSNS (1), /* MADBR */
247 COSTS_N_INSNS (1), /* MAEBR */
248 COSTS_N_INSNS (111), /* DXBR */
249 COSTS_N_INSNS (39), /* DDBR */
250 COSTS_N_INSNS (32), /* DEBR */
251 COSTS_N_INSNS (160), /* DLGR */
252 COSTS_N_INSNS (71), /* DLR */
253 COSTS_N_INSNS (71), /* DR */
254 COSTS_N_INSNS (71), /* DSGFR */
255 COSTS_N_INSNS (71), /* DSGR */
256 };
257
258 static const
259 struct processor_costs z196_cost =
260 {
261 COSTS_N_INSNS (7), /* M */
262 COSTS_N_INSNS (5), /* MGHI */
263 COSTS_N_INSNS (5), /* MH */
264 COSTS_N_INSNS (5), /* MHI */
265 COSTS_N_INSNS (7), /* ML */
266 COSTS_N_INSNS (7), /* MR */
267 COSTS_N_INSNS (6), /* MS */
268 COSTS_N_INSNS (8), /* MSG */
269 COSTS_N_INSNS (6), /* MSGF */
270 COSTS_N_INSNS (6), /* MSGFR */
271 COSTS_N_INSNS (8), /* MSGR */
272 COSTS_N_INSNS (6), /* MSR */
273 COSTS_N_INSNS (1) , /* multiplication in DFmode */
274 COSTS_N_INSNS (40), /* MXBR B+40 */
275 COSTS_N_INSNS (100), /* SQXBR B+100 */
276 COSTS_N_INSNS (42), /* SQDBR B+42 */
277 COSTS_N_INSNS (28), /* SQEBR B+28 */
278 COSTS_N_INSNS (1), /* MADBR B */
279 COSTS_N_INSNS (1), /* MAEBR B */
280 COSTS_N_INSNS (101), /* DXBR B+101 */
281 COSTS_N_INSNS (29), /* DDBR */
282 COSTS_N_INSNS (22), /* DEBR */
283 COSTS_N_INSNS (160), /* DLGR cracked */
284 COSTS_N_INSNS (160), /* DLR cracked */
285 COSTS_N_INSNS (160), /* DR expanded */
286 COSTS_N_INSNS (160), /* DSGFR cracked */
287 COSTS_N_INSNS (160), /* DSGR cracked */
288 };
289
290 static const
291 struct processor_costs zEC12_cost =
292 {
293 COSTS_N_INSNS (7), /* M */
294 COSTS_N_INSNS (5), /* MGHI */
295 COSTS_N_INSNS (5), /* MH */
296 COSTS_N_INSNS (5), /* MHI */
297 COSTS_N_INSNS (7), /* ML */
298 COSTS_N_INSNS (7), /* MR */
299 COSTS_N_INSNS (6), /* MS */
300 COSTS_N_INSNS (8), /* MSG */
301 COSTS_N_INSNS (6), /* MSGF */
302 COSTS_N_INSNS (6), /* MSGFR */
303 COSTS_N_INSNS (8), /* MSGR */
304 COSTS_N_INSNS (6), /* MSR */
305 COSTS_N_INSNS (1) , /* multiplication in DFmode */
306 COSTS_N_INSNS (40), /* MXBR B+40 */
307 COSTS_N_INSNS (100), /* SQXBR B+100 */
308 COSTS_N_INSNS (42), /* SQDBR B+42 */
309 COSTS_N_INSNS (28), /* SQEBR B+28 */
310 COSTS_N_INSNS (1), /* MADBR B */
311 COSTS_N_INSNS (1), /* MAEBR B */
312 COSTS_N_INSNS (131), /* DXBR B+131 */
313 COSTS_N_INSNS (29), /* DDBR */
314 COSTS_N_INSNS (22), /* DEBR */
315 COSTS_N_INSNS (160), /* DLGR cracked */
316 COSTS_N_INSNS (160), /* DLR cracked */
317 COSTS_N_INSNS (160), /* DR expanded */
318 COSTS_N_INSNS (160), /* DSGFR cracked */
319 COSTS_N_INSNS (160), /* DSGR cracked */
320 };
321
322 extern int reload_completed;
323
324 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
325 static rtx_insn *last_scheduled_insn;
326
327 /* Structure used to hold the components of a S/390 memory
328 address. A legitimate address on S/390 is of the general
329 form
330 base + index + displacement
331 where any of the components is optional.
332
333 base and index are registers of the class ADDR_REGS,
334 displacement is an unsigned 12-bit immediate constant. */
335
336 struct s390_address
337 {
338 rtx base;
339 rtx indx;
340 rtx disp;
341 bool pointer;
342 bool literal_pool;
343 };
344
345 /* The following structure is embedded in the machine
346 specific part of struct function. */
347
348 struct GTY (()) s390_frame_layout
349 {
350 /* Offset within stack frame. */
351 HOST_WIDE_INT gprs_offset;
352 HOST_WIDE_INT f0_offset;
353 HOST_WIDE_INT f4_offset;
354 HOST_WIDE_INT f8_offset;
355 HOST_WIDE_INT backchain_offset;
356
357 /* Number of first and last gpr where slots in the register
358 save area are reserved for. */
359 int first_save_gpr_slot;
360 int last_save_gpr_slot;
361
362 /* Location (FP register number) where GPRs (r0-r15) should
363 be saved to.
364 0 - does not need to be saved at all
365 -1 - stack slot */
366 signed char gpr_save_slots[16];
367
368 /* Number of first and last gpr to be saved, restored. */
369 int first_save_gpr;
370 int first_restore_gpr;
371 int last_save_gpr;
372 int last_restore_gpr;
373
374 /* Bits standing for floating point registers. Set, if the
375 respective register has to be saved. Starting with reg 16 (f0)
376 at the rightmost bit.
377 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
378 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
379 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
380 unsigned int fpr_bitmap;
381
382 /* Number of floating point registers f8-f15 which must be saved. */
383 int high_fprs;
384
385 /* Set if return address needs to be saved.
386 This flag is set by s390_return_addr_rtx if it could not use
387 the initial value of r14 and therefore depends on r14 saved
388 to the stack. */
389 bool save_return_addr_p;
390
391 /* Size of stack frame. */
392 HOST_WIDE_INT frame_size;
393 };
394
395 /* Define the structure for the machine field in struct function. */
396
397 struct GTY(()) machine_function
398 {
399 struct s390_frame_layout frame_layout;
400
401 /* Literal pool base register. */
402 rtx base_reg;
403
404 /* True if we may need to perform branch splitting. */
405 bool split_branches_pending_p;
406
407 bool has_landing_pad_p;
408
409 /* True if the current function may contain a tbegin clobbering
410 FPRs. */
411 bool tbegin_p;
412 };
413
414 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
415
416 #define cfun_frame_layout (cfun->machine->frame_layout)
417 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
418 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
419 ? cfun_frame_layout.fpr_bitmap & 0x0f \
420 : cfun_frame_layout.fpr_bitmap & 0x03))
421 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
422 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
423 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
424 (1 << (REGNO - FPR0_REGNUM)))
425 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
426 (1 << (REGNO - FPR0_REGNUM))))
427 #define cfun_gpr_save_slot(REGNO) \
428 cfun->machine->frame_layout.gpr_save_slots[REGNO]
429
430 /* Number of GPRs and FPRs used for argument passing. */
431 #define GP_ARG_NUM_REG 5
432 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
433 #define VEC_ARG_NUM_REG 8
434
435 /* A couple of shortcuts. */
436 #define CONST_OK_FOR_J(x) \
437 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
438 #define CONST_OK_FOR_K(x) \
439 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
440 #define CONST_OK_FOR_Os(x) \
441 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
442 #define CONST_OK_FOR_Op(x) \
443 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
444 #define CONST_OK_FOR_On(x) \
445 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
446
447 #define REGNO_PAIR_OK(REGNO, MODE) \
448 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
449
450 /* That's the read ahead of the dynamic branch prediction unit in
451 bytes on a z10 (or higher) CPU. */
452 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
453
454
455 /* System z builtins. */
456
457 #include "s390-builtins.h"
458
459 const unsigned int flags_builtin[S390_BUILTIN_MAX + 1] =
460 {
461 #undef B_DEF
462 #undef OB_DEF
463 #undef OB_DEF_VAR
464 #define B_DEF(NAME, PATTERN, ATTRS, FLAGS, FNTYPE) FLAGS,
465 #define OB_DEF(...)
466 #define OB_DEF_VAR(...)
467 #include "s390-builtins.def"
468 0
469 };
470
471 const unsigned int flags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
472 {
473 #undef B_DEF
474 #undef OB_DEF
475 #undef OB_DEF_VAR
476 #define B_DEF(...)
477 #define OB_DEF(...)
478 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
479 #include "s390-builtins.def"
480 0
481 };
482
483 tree s390_builtin_types[BT_MAX];
484 tree s390_builtin_fn_types[BT_FN_MAX];
485 tree s390_builtin_decls[S390_BUILTIN_MAX +
486 S390_OVERLOADED_BUILTIN_MAX +
487 S390_OVERLOADED_BUILTIN_VAR_MAX];
488
489 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
490 #undef B_DEF
491 #undef OB_DEF
492 #undef OB_DEF_VAR
493 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
494 #define OB_DEF(...)
495 #define OB_DEF_VAR(...)
496
497 #include "s390-builtins.def"
498 CODE_FOR_nothing
499 };
500
501 static void
502 s390_init_builtins (void)
503 {
504 /* These definitions are being used in s390-builtins.def. */
505 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
506 NULL, NULL);
507 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
508 tree c_uint64_type_node;
509
510 /* The uint64_type_node from tree.c is not compatible to the C99
511 uint64_t data type. What we want is c_uint64_type_node from
512 c-common.c. But since backend code is not supposed to interface
513 with the frontend we recreate it here. */
514 if (TARGET_64BIT)
515 c_uint64_type_node = long_unsigned_type_node;
516 else
517 c_uint64_type_node = long_long_unsigned_type_node;
518
519 #undef DEF_TYPE
520 #define DEF_TYPE(INDEX, NODE, CONST_P) \
521 s390_builtin_types[INDEX] = (!CONST_P) ? \
522 (NODE) : build_type_variant ((NODE), 1, 0);
523
524 #undef DEF_POINTER_TYPE
525 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
526 s390_builtin_types[INDEX] = \
527 build_pointer_type (s390_builtin_types[INDEX_BASE]);
528
529 #undef DEF_DISTINCT_TYPE
530 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
531 s390_builtin_types[INDEX] = \
532 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
533
534 #undef DEF_VECTOR_TYPE
535 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
536 s390_builtin_types[INDEX] = \
537 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
538
539 #undef DEF_OPAQUE_VECTOR_TYPE
540 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
541 s390_builtin_types[INDEX] = \
542 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
543
544 #undef DEF_FN_TYPE
545 #define DEF_FN_TYPE(INDEX, args...) \
546 s390_builtin_fn_types[INDEX] = \
547 build_function_type_list (args, NULL_TREE);
548 #undef DEF_OV_TYPE
549 #define DEF_OV_TYPE(...)
550 #include "s390-builtin-types.def"
551
552 #undef B_DEF
553 #define B_DEF(NAME, PATTERN, ATTRS, FLAGS, FNTYPE) \
554 s390_builtin_decls[S390_BUILTIN_##NAME] = \
555 add_builtin_function ("__builtin_" #NAME, \
556 s390_builtin_fn_types[FNTYPE], \
557 S390_BUILTIN_##NAME, \
558 BUILT_IN_MD, \
559 NULL, \
560 ATTRS);
561 #undef OB_DEF
562 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, FNTYPE) \
563 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
564 add_builtin_function ("__builtin_" #NAME, \
565 s390_builtin_fn_types[FNTYPE], \
566 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
567 BUILT_IN_MD, \
568 NULL, \
569 0);
570 #undef OB_DEF_VAR
571 #define OB_DEF_VAR(...)
572 #include "s390-builtins.def"
573
574 }
575
576 /* Return true if ARG is appropriate as argument number ARGNUM of
577 builtin DECL. The operand flags from s390-builtins.def have to
578 passed as OP_FLAGS. */
579 bool
580 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
581 {
582 if (O_UIMM_P (op_flags))
583 {
584 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
585 int bitwidth = bitwidths[op_flags - O_U1];
586
587 if (!tree_fits_uhwi_p (arg)
588 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
589 {
590 error("constant argument %d for builtin %qF is out of range (0.."
591 HOST_WIDE_INT_PRINT_UNSIGNED ")",
592 argnum, decl,
593 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
594 return false;
595 }
596 }
597
598 if (O_SIMM_P (op_flags))
599 {
600 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
601 int bitwidth = bitwidths[op_flags - O_S2];
602
603 if (!tree_fits_shwi_p (arg)
604 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
605 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
606 {
607 error("constant argument %d for builtin %qF is out of range ("
608 HOST_WIDE_INT_PRINT_DEC ".."
609 HOST_WIDE_INT_PRINT_DEC ")",
610 argnum, decl,
611 -(HOST_WIDE_INT)1 << (bitwidth - 1),
612 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
613 return false;
614 }
615 }
616 return true;
617 }
618
619 /* Expand an expression EXP that calls a built-in function,
620 with result going to TARGET if that's convenient
621 (and in mode MODE if that's convenient).
622 SUBTARGET may be used as the target for computing one of EXP's operands.
623 IGNORE is nonzero if the value is to be ignored. */
624
625 static rtx
626 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
627 machine_mode mode ATTRIBUTE_UNUSED,
628 int ignore ATTRIBUTE_UNUSED)
629 {
630 #define MAX_ARGS 5
631
632 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
633 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
634 enum insn_code icode;
635 rtx op[MAX_ARGS], pat;
636 int arity;
637 bool nonvoid;
638 tree arg;
639 call_expr_arg_iterator iter;
640 unsigned int all_op_flags = flags_for_builtin (fcode);
641 machine_mode last_vec_mode = VOIDmode;
642
643 if (TARGET_DEBUG_ARG)
644 {
645 fprintf (stderr,
646 "s390_expand_builtin, code = %4d, %s\n",
647 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
648 }
649
650
651 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
652 && fcode < S390_ALL_BUILTIN_MAX)
653 {
654 gcc_unreachable ();
655 }
656 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
657 {
658 icode = code_for_builtin[fcode];
659 /* Set a flag in the machine specific cfun part in order to support
660 saving/restoring of FPRs. */
661 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
662 cfun->machine->tbegin_p = true;
663 }
664 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
665 {
666 error ("Unresolved overloaded builtin");
667 return const0_rtx;
668 }
669 else
670 internal_error ("bad builtin fcode");
671
672 if (icode == 0)
673 internal_error ("bad builtin icode");
674
675 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
676
677 if (nonvoid)
678 {
679 machine_mode tmode = insn_data[icode].operand[0].mode;
680 if (!target
681 || GET_MODE (target) != tmode
682 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
683 target = gen_reg_rtx (tmode);
684
685 /* There are builtins (e.g. vec_promote) with no vector
686 arguments but an element selector. So we have to also look
687 at the vector return type when emitting the modulo
688 operation. */
689 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
690 last_vec_mode = insn_data[icode].operand[0].mode;
691 }
692
693 arity = 0;
694 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
695 {
696 const struct insn_operand_data *insn_op;
697 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
698
699 all_op_flags = all_op_flags >> O_SHIFT;
700
701 if (arg == error_mark_node)
702 return NULL_RTX;
703 if (arity >= MAX_ARGS)
704 return NULL_RTX;
705
706 if (O_IMM_P (op_flags)
707 && TREE_CODE (arg) != INTEGER_CST)
708 {
709 error ("constant value required for builtin %qF argument %d",
710 fndecl, arity + 1);
711 return const0_rtx;
712 }
713
714 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
715 return const0_rtx;
716
717 insn_op = &insn_data[icode].operand[arity + nonvoid];
718 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
719
720 /* Wrap the expanded RTX for pointer types into a MEM expr with
721 the proper mode. This allows us to use e.g. (match_operand
722 "memory_operand"..) in the insn patterns instead of (mem
723 (match_operand "address_operand)). This is helpful for
724 patterns not just accepting MEMs. */
725 if (POINTER_TYPE_P (TREE_TYPE (arg))
726 && insn_op->predicate != address_operand)
727 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
728
729 /* Expand the module operation required on element selectors. */
730 if (op_flags == O_ELEM)
731 {
732 gcc_assert (last_vec_mode != VOIDmode);
733 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
734 op[arity],
735 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
736 NULL_RTX, 1, OPTAB_DIRECT);
737 }
738
739 /* Record the vector mode used for an element selector. This assumes:
740 1. There is no builtin with two different vector modes and an element selector
741 2. The element selector comes after the vector type it is referring to.
742 This currently the true for all the builtins but FIXME we
743 should better check for that. */
744 if (VECTOR_MODE_P (insn_op->mode))
745 last_vec_mode = insn_op->mode;
746
747 if (insn_op->predicate (op[arity], insn_op->mode))
748 {
749 arity++;
750 continue;
751 }
752
753 if (MEM_P (op[arity])
754 && insn_op->predicate == memory_operand
755 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
756 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
757 {
758 op[arity] = replace_equiv_address (op[arity],
759 copy_to_mode_reg (Pmode,
760 XEXP (op[arity], 0)));
761 }
762 else if (GET_MODE (op[arity]) == insn_op->mode
763 || GET_MODE (op[arity]) == VOIDmode
764 || (insn_op->predicate == address_operand
765 && GET_MODE (op[arity]) == Pmode))
766 {
767 /* An address_operand usually has VOIDmode in the expander
768 so we cannot use this. */
769 machine_mode target_mode =
770 (insn_op->predicate == address_operand
771 ? Pmode : insn_op->mode);
772 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
773 }
774
775 if (!insn_op->predicate (op[arity], insn_op->mode))
776 {
777 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
778 return const0_rtx;
779 }
780 arity++;
781 }
782
783 if (last_vec_mode != VOIDmode && !TARGET_VX)
784 {
785 error ("Vector type builtin %qF is not supported without -mvx "
786 "(default with -march=z13).",
787 fndecl);
788 return const0_rtx;
789 }
790
791 switch (arity)
792 {
793 case 0:
794 pat = GEN_FCN (icode) (target);
795 break;
796 case 1:
797 if (nonvoid)
798 pat = GEN_FCN (icode) (target, op[0]);
799 else
800 pat = GEN_FCN (icode) (op[0]);
801 break;
802 case 2:
803 if (nonvoid)
804 pat = GEN_FCN (icode) (target, op[0], op[1]);
805 else
806 pat = GEN_FCN (icode) (op[0], op[1]);
807 break;
808 case 3:
809 if (nonvoid)
810 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
811 else
812 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
813 break;
814 case 4:
815 if (nonvoid)
816 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
817 else
818 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
819 break;
820 case 5:
821 if (nonvoid)
822 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
823 else
824 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
825 break;
826 case 6:
827 if (nonvoid)
828 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
829 else
830 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
831 break;
832 default:
833 gcc_unreachable ();
834 }
835 if (!pat)
836 return NULL_RTX;
837 emit_insn (pat);
838
839 if (nonvoid)
840 return target;
841 else
842 return const0_rtx;
843 }
844
845
846 static const int s390_hotpatch_hw_max = 1000000;
847 static int s390_hotpatch_hw_before_label = 0;
848 static int s390_hotpatch_hw_after_label = 0;
849
850 /* Check whether the hotpatch attribute is applied to a function and, if it has
851 an argument, the argument is valid. */
852
853 static tree
854 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
855 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
856 {
857 tree expr;
858 tree expr2;
859 int err;
860
861 if (TREE_CODE (*node) != FUNCTION_DECL)
862 {
863 warning (OPT_Wattributes, "%qE attribute only applies to functions",
864 name);
865 *no_add_attrs = true;
866 }
867 if (args != NULL && TREE_CHAIN (args) != NULL)
868 {
869 expr = TREE_VALUE (args);
870 expr2 = TREE_VALUE (TREE_CHAIN (args));
871 }
872 if (args == NULL || TREE_CHAIN (args) == NULL)
873 err = 1;
874 else if (TREE_CODE (expr) != INTEGER_CST
875 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
876 || wi::gtu_p (expr, s390_hotpatch_hw_max))
877 err = 1;
878 else if (TREE_CODE (expr2) != INTEGER_CST
879 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
880 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
881 err = 1;
882 else
883 err = 0;
884 if (err)
885 {
886 error ("requested %qE attribute is not a comma separated pair of"
887 " non-negative integer constants or too large (max. %d)", name,
888 s390_hotpatch_hw_max);
889 *no_add_attrs = true;
890 }
891
892 return NULL_TREE;
893 }
894
895 /* Expand the s390_vector_bool type attribute. */
896
897 static tree
898 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
899 tree args ATTRIBUTE_UNUSED,
900 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
901 {
902 tree type = *node, result = NULL_TREE;
903 machine_mode mode;
904
905 while (POINTER_TYPE_P (type)
906 || TREE_CODE (type) == FUNCTION_TYPE
907 || TREE_CODE (type) == METHOD_TYPE
908 || TREE_CODE (type) == ARRAY_TYPE)
909 type = TREE_TYPE (type);
910
911 mode = TYPE_MODE (type);
912 switch (mode)
913 {
914 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
915 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
916 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
917 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
918 default: break;
919 }
920
921 *no_add_attrs = true; /* No need to hang on to the attribute. */
922
923 if (result)
924 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
925
926 return NULL_TREE;
927 }
928
929 static const struct attribute_spec s390_attribute_table[] = {
930 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
931 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
932 /* End element. */
933 { NULL, 0, 0, false, false, false, NULL, false }
934 };
935
936 /* Return the alignment for LABEL. We default to the -falign-labels
937 value except for the literal pool base label. */
938 int
939 s390_label_align (rtx label)
940 {
941 rtx_insn *prev_insn = prev_active_insn (label);
942 rtx set, src;
943
944 if (prev_insn == NULL_RTX)
945 goto old;
946
947 set = single_set (prev_insn);
948
949 if (set == NULL_RTX)
950 goto old;
951
952 src = SET_SRC (set);
953
954 /* Don't align literal pool base labels. */
955 if (GET_CODE (src) == UNSPEC
956 && XINT (src, 1) == UNSPEC_MAIN_BASE)
957 return 0;
958
959 old:
960 return align_labels_log;
961 }
962
963 static machine_mode
964 s390_libgcc_cmp_return_mode (void)
965 {
966 return TARGET_64BIT ? DImode : SImode;
967 }
968
969 static machine_mode
970 s390_libgcc_shift_count_mode (void)
971 {
972 return TARGET_64BIT ? DImode : SImode;
973 }
974
975 static machine_mode
976 s390_unwind_word_mode (void)
977 {
978 return TARGET_64BIT ? DImode : SImode;
979 }
980
981 /* Return true if the back end supports mode MODE. */
982 static bool
983 s390_scalar_mode_supported_p (machine_mode mode)
984 {
985 /* In contrast to the default implementation reject TImode constants on 31bit
986 TARGET_ZARCH for ABI compliance. */
987 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
988 return false;
989
990 if (DECIMAL_FLOAT_MODE_P (mode))
991 return default_decimal_float_supported_p ();
992
993 return default_scalar_mode_supported_p (mode);
994 }
995
996 /* Return true if the back end supports vector mode MODE. */
997 static bool
998 s390_vector_mode_supported_p (machine_mode mode)
999 {
1000 machine_mode inner;
1001
1002 if (!VECTOR_MODE_P (mode)
1003 || !TARGET_VX
1004 || GET_MODE_SIZE (mode) > 16)
1005 return false;
1006
1007 inner = GET_MODE_INNER (mode);
1008
1009 switch (inner)
1010 {
1011 case QImode:
1012 case HImode:
1013 case SImode:
1014 case DImode:
1015 case TImode:
1016 case SFmode:
1017 case DFmode:
1018 case TFmode:
1019 return true;
1020 default:
1021 return false;
1022 }
1023 }
1024
1025 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1026
1027 void
1028 s390_set_has_landing_pad_p (bool value)
1029 {
1030 cfun->machine->has_landing_pad_p = value;
1031 }
1032
1033 /* If two condition code modes are compatible, return a condition code
1034 mode which is compatible with both. Otherwise, return
1035 VOIDmode. */
1036
1037 static machine_mode
1038 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1039 {
1040 if (m1 == m2)
1041 return m1;
1042
1043 switch (m1)
1044 {
1045 case CCZmode:
1046 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1047 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1048 return m2;
1049 return VOIDmode;
1050
1051 case CCSmode:
1052 case CCUmode:
1053 case CCTmode:
1054 case CCSRmode:
1055 case CCURmode:
1056 case CCZ1mode:
1057 if (m2 == CCZmode)
1058 return m1;
1059
1060 return VOIDmode;
1061
1062 default:
1063 return VOIDmode;
1064 }
1065 return VOIDmode;
1066 }
1067
1068 /* Return true if SET either doesn't set the CC register, or else
1069 the source and destination have matching CC modes and that
1070 CC mode is at least as constrained as REQ_MODE. */
1071
1072 static bool
1073 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1074 {
1075 machine_mode set_mode;
1076
1077 gcc_assert (GET_CODE (set) == SET);
1078
1079 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1080 return 1;
1081
1082 set_mode = GET_MODE (SET_DEST (set));
1083 switch (set_mode)
1084 {
1085 case CCSmode:
1086 case CCSRmode:
1087 case CCUmode:
1088 case CCURmode:
1089 case CCLmode:
1090 case CCL1mode:
1091 case CCL2mode:
1092 case CCL3mode:
1093 case CCT1mode:
1094 case CCT2mode:
1095 case CCT3mode:
1096 case CCVEQmode:
1097 case CCVHmode:
1098 case CCVHUmode:
1099 case CCVFHmode:
1100 case CCVFHEmode:
1101 if (req_mode != set_mode)
1102 return 0;
1103 break;
1104
1105 case CCZmode:
1106 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1107 && req_mode != CCSRmode && req_mode != CCURmode)
1108 return 0;
1109 break;
1110
1111 case CCAPmode:
1112 case CCANmode:
1113 if (req_mode != CCAmode)
1114 return 0;
1115 break;
1116
1117 default:
1118 gcc_unreachable ();
1119 }
1120
1121 return (GET_MODE (SET_SRC (set)) == set_mode);
1122 }
1123
1124 /* Return true if every SET in INSN that sets the CC register
1125 has source and destination with matching CC modes and that
1126 CC mode is at least as constrained as REQ_MODE.
1127 If REQ_MODE is VOIDmode, always return false. */
1128
1129 bool
1130 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1131 {
1132 int i;
1133
1134 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1135 if (req_mode == VOIDmode)
1136 return false;
1137
1138 if (GET_CODE (PATTERN (insn)) == SET)
1139 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1140
1141 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1142 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1143 {
1144 rtx set = XVECEXP (PATTERN (insn), 0, i);
1145 if (GET_CODE (set) == SET)
1146 if (!s390_match_ccmode_set (set, req_mode))
1147 return false;
1148 }
1149
1150 return true;
1151 }
1152
1153 /* If a test-under-mask instruction can be used to implement
1154 (compare (and ... OP1) OP2), return the CC mode required
1155 to do that. Otherwise, return VOIDmode.
1156 MIXED is true if the instruction can distinguish between
1157 CC1 and CC2 for mixed selected bits (TMxx), it is false
1158 if the instruction cannot (TM). */
1159
1160 machine_mode
1161 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1162 {
1163 int bit0, bit1;
1164
1165 /* ??? Fixme: should work on CONST_DOUBLE as well. */
1166 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1167 return VOIDmode;
1168
1169 /* Selected bits all zero: CC0.
1170 e.g.: int a; if ((a & (16 + 128)) == 0) */
1171 if (INTVAL (op2) == 0)
1172 return CCTmode;
1173
1174 /* Selected bits all one: CC3.
1175 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1176 if (INTVAL (op2) == INTVAL (op1))
1177 return CCT3mode;
1178
1179 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1180 int a;
1181 if ((a & (16 + 128)) == 16) -> CCT1
1182 if ((a & (16 + 128)) == 128) -> CCT2 */
1183 if (mixed)
1184 {
1185 bit1 = exact_log2 (INTVAL (op2));
1186 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1187 if (bit0 != -1 && bit1 != -1)
1188 return bit0 > bit1 ? CCT1mode : CCT2mode;
1189 }
1190
1191 return VOIDmode;
1192 }
1193
1194 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1195 OP0 and OP1 of a COMPARE, return the mode to be used for the
1196 comparison. */
1197
1198 machine_mode
1199 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1200 {
1201 if (TARGET_VX
1202 && register_operand (op0, DFmode)
1203 && register_operand (op1, DFmode))
1204 {
1205 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1206 s390_emit_compare or s390_canonicalize_comparison will take
1207 care of it. */
1208 switch (code)
1209 {
1210 case EQ:
1211 case NE:
1212 return CCVEQmode;
1213 case GT:
1214 case UNLE:
1215 return CCVFHmode;
1216 case GE:
1217 case UNLT:
1218 return CCVFHEmode;
1219 default:
1220 ;
1221 }
1222 }
1223
1224 switch (code)
1225 {
1226 case EQ:
1227 case NE:
1228 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1229 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1230 return CCAPmode;
1231 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1232 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1233 return CCAPmode;
1234 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1235 || GET_CODE (op1) == NEG)
1236 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1237 return CCLmode;
1238
1239 if (GET_CODE (op0) == AND)
1240 {
1241 /* Check whether we can potentially do it via TM. */
1242 machine_mode ccmode;
1243 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1244 if (ccmode != VOIDmode)
1245 {
1246 /* Relax CCTmode to CCZmode to allow fall-back to AND
1247 if that turns out to be beneficial. */
1248 return ccmode == CCTmode ? CCZmode : ccmode;
1249 }
1250 }
1251
1252 if (register_operand (op0, HImode)
1253 && GET_CODE (op1) == CONST_INT
1254 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1255 return CCT3mode;
1256 if (register_operand (op0, QImode)
1257 && GET_CODE (op1) == CONST_INT
1258 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1259 return CCT3mode;
1260
1261 return CCZmode;
1262
1263 case LE:
1264 case LT:
1265 case GE:
1266 case GT:
1267 /* The only overflow condition of NEG and ABS happens when
1268 -INT_MAX is used as parameter, which stays negative. So
1269 we have an overflow from a positive value to a negative.
1270 Using CCAP mode the resulting cc can be used for comparisons. */
1271 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1272 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1273 return CCAPmode;
1274
1275 /* If constants are involved in an add instruction it is possible to use
1276 the resulting cc for comparisons with zero. Knowing the sign of the
1277 constant the overflow behavior gets predictable. e.g.:
1278 int a, b; if ((b = a + c) > 0)
1279 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1280 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1281 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1282 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1283 /* Avoid INT32_MIN on 32 bit. */
1284 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1285 {
1286 if (INTVAL (XEXP((op0), 1)) < 0)
1287 return CCANmode;
1288 else
1289 return CCAPmode;
1290 }
1291 /* Fall through. */
1292 case UNORDERED:
1293 case ORDERED:
1294 case UNEQ:
1295 case UNLE:
1296 case UNLT:
1297 case UNGE:
1298 case UNGT:
1299 case LTGT:
1300 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1301 && GET_CODE (op1) != CONST_INT)
1302 return CCSRmode;
1303 return CCSmode;
1304
1305 case LTU:
1306 case GEU:
1307 if (GET_CODE (op0) == PLUS
1308 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1309 return CCL1mode;
1310
1311 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1312 && GET_CODE (op1) != CONST_INT)
1313 return CCURmode;
1314 return CCUmode;
1315
1316 case LEU:
1317 case GTU:
1318 if (GET_CODE (op0) == MINUS
1319 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1320 return CCL2mode;
1321
1322 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1323 && GET_CODE (op1) != CONST_INT)
1324 return CCURmode;
1325 return CCUmode;
1326
1327 default:
1328 gcc_unreachable ();
1329 }
1330 }
1331
1332 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1333 that we can implement more efficiently. */
1334
1335 static void
1336 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1337 bool op0_preserve_value)
1338 {
1339 if (op0_preserve_value)
1340 return;
1341
1342 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1343 if ((*code == EQ || *code == NE)
1344 && *op1 == const0_rtx
1345 && GET_CODE (*op0) == ZERO_EXTRACT
1346 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1347 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1348 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1349 {
1350 rtx inner = XEXP (*op0, 0);
1351 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1352 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1353 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1354
1355 if (len > 0 && len < modesize
1356 && pos >= 0 && pos + len <= modesize
1357 && modesize <= HOST_BITS_PER_WIDE_INT)
1358 {
1359 unsigned HOST_WIDE_INT block;
1360 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1361 block <<= modesize - pos - len;
1362
1363 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1364 gen_int_mode (block, GET_MODE (inner)));
1365 }
1366 }
1367
1368 /* Narrow AND of memory against immediate to enable TM. */
1369 if ((*code == EQ || *code == NE)
1370 && *op1 == const0_rtx
1371 && GET_CODE (*op0) == AND
1372 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1373 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1374 {
1375 rtx inner = XEXP (*op0, 0);
1376 rtx mask = XEXP (*op0, 1);
1377
1378 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1379 if (GET_CODE (inner) == SUBREG
1380 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1381 && (GET_MODE_SIZE (GET_MODE (inner))
1382 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1383 && ((INTVAL (mask)
1384 & GET_MODE_MASK (GET_MODE (inner))
1385 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1386 == 0))
1387 inner = SUBREG_REG (inner);
1388
1389 /* Do not change volatile MEMs. */
1390 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1391 {
1392 int part = s390_single_part (XEXP (*op0, 1),
1393 GET_MODE (inner), QImode, 0);
1394 if (part >= 0)
1395 {
1396 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1397 inner = adjust_address_nv (inner, QImode, part);
1398 *op0 = gen_rtx_AND (QImode, inner, mask);
1399 }
1400 }
1401 }
1402
1403 /* Narrow comparisons against 0xffff to HImode if possible. */
1404 if ((*code == EQ || *code == NE)
1405 && GET_CODE (*op1) == CONST_INT
1406 && INTVAL (*op1) == 0xffff
1407 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1408 && (nonzero_bits (*op0, GET_MODE (*op0))
1409 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1410 {
1411 *op0 = gen_lowpart (HImode, *op0);
1412 *op1 = constm1_rtx;
1413 }
1414
1415 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1416 if (GET_CODE (*op0) == UNSPEC
1417 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1418 && XVECLEN (*op0, 0) == 1
1419 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1420 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1421 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1422 && *op1 == const0_rtx)
1423 {
1424 enum rtx_code new_code = UNKNOWN;
1425 switch (*code)
1426 {
1427 case EQ: new_code = EQ; break;
1428 case NE: new_code = NE; break;
1429 case LT: new_code = GTU; break;
1430 case GT: new_code = LTU; break;
1431 case LE: new_code = GEU; break;
1432 case GE: new_code = LEU; break;
1433 default: break;
1434 }
1435
1436 if (new_code != UNKNOWN)
1437 {
1438 *op0 = XVECEXP (*op0, 0, 0);
1439 *code = new_code;
1440 }
1441 }
1442
1443 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1444 if (GET_CODE (*op0) == UNSPEC
1445 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1446 && XVECLEN (*op0, 0) == 1
1447 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1448 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1449 && CONST_INT_P (*op1))
1450 {
1451 enum rtx_code new_code = UNKNOWN;
1452 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1453 {
1454 case CCZmode:
1455 case CCRAWmode:
1456 switch (*code)
1457 {
1458 case EQ: new_code = EQ; break;
1459 case NE: new_code = NE; break;
1460 default: break;
1461 }
1462 break;
1463 default: break;
1464 }
1465
1466 if (new_code != UNKNOWN)
1467 {
1468 /* For CCRAWmode put the required cc mask into the second
1469 operand. */
1470 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1471 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1472 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1473 *op0 = XVECEXP (*op0, 0, 0);
1474 *code = new_code;
1475 }
1476 }
1477
1478 /* Simplify cascaded EQ, NE with const0_rtx. */
1479 if ((*code == NE || *code == EQ)
1480 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1481 && GET_MODE (*op0) == SImode
1482 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1483 && REG_P (XEXP (*op0, 0))
1484 && XEXP (*op0, 1) == const0_rtx
1485 && *op1 == const0_rtx)
1486 {
1487 if ((*code == EQ && GET_CODE (*op0) == NE)
1488 || (*code == NE && GET_CODE (*op0) == EQ))
1489 *code = EQ;
1490 else
1491 *code = NE;
1492 *op0 = XEXP (*op0, 0);
1493 }
1494
1495 /* Prefer register over memory as first operand. */
1496 if (MEM_P (*op0) && REG_P (*op1))
1497 {
1498 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1499 *code = (int)swap_condition ((enum rtx_code)*code);
1500 }
1501
1502 /* Using the scalar variants of vector instructions for 64 bit FP
1503 comparisons might require swapping the operands. */
1504 if (TARGET_VX
1505 && register_operand (*op0, DFmode)
1506 && register_operand (*op1, DFmode)
1507 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1508 {
1509 rtx tmp;
1510
1511 switch (*code)
1512 {
1513 case LT: *code = GT; break;
1514 case LE: *code = GE; break;
1515 case UNGT: *code = UNLE; break;
1516 case UNGE: *code = UNLT; break;
1517 default: ;
1518 }
1519 tmp = *op0; *op0 = *op1; *op1 = tmp;
1520 }
1521 }
1522
1523 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1524 FP compare using the single element variant of vector instructions.
1525 Replace CODE with the comparison code to be used in the CC reg
1526 compare and return the condition code register RTX in CC. */
1527
1528 static bool
1529 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1530 rtx *cc)
1531 {
1532 machine_mode cmp_mode;
1533 bool swap_p = false;
1534
1535 switch (*code)
1536 {
1537 case EQ: cmp_mode = CCVEQmode; break;
1538 case NE: cmp_mode = CCVEQmode; break;
1539 case GT: cmp_mode = CCVFHmode; break;
1540 case GE: cmp_mode = CCVFHEmode; break;
1541 case UNLE: cmp_mode = CCVFHmode; break;
1542 case UNLT: cmp_mode = CCVFHEmode; break;
1543 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1544 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1545 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1546 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1547 default: return false;
1548 }
1549
1550 if (swap_p)
1551 {
1552 rtx tmp = cmp2;
1553 cmp2 = cmp1;
1554 cmp1 = tmp;
1555 }
1556 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1557 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1558 gen_rtvec (2,
1559 gen_rtx_SET (*cc,
1560 gen_rtx_COMPARE (cmp_mode, cmp1,
1561 cmp2)),
1562 gen_rtx_CLOBBER (VOIDmode,
1563 gen_rtx_SCRATCH (V2DImode)))));
1564 return true;
1565 }
1566
1567
1568 /* Emit a compare instruction suitable to implement the comparison
1569 OP0 CODE OP1. Return the correct condition RTL to be placed in
1570 the IF_THEN_ELSE of the conditional branch testing the result. */
1571
1572 rtx
1573 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1574 {
1575 machine_mode mode = s390_select_ccmode (code, op0, op1);
1576 rtx cc;
1577
1578 if (TARGET_VX
1579 && register_operand (op0, DFmode)
1580 && register_operand (op1, DFmode)
1581 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1582 {
1583 /* Work has been done by s390_expand_vec_compare_scalar already. */
1584 }
1585 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1586 {
1587 /* Do not output a redundant compare instruction if a
1588 compare_and_swap pattern already computed the result and the
1589 machine modes are compatible. */
1590 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1591 == GET_MODE (op0));
1592 cc = op0;
1593 }
1594 else
1595 {
1596 cc = gen_rtx_REG (mode, CC_REGNUM);
1597 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1598 }
1599
1600 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1601 }
1602
1603 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1604 matches CMP.
1605 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1606 conditional branch testing the result. */
1607
1608 static rtx
1609 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1610 rtx cmp, rtx new_rtx)
1611 {
1612 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1613 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1614 const0_rtx);
1615 }
1616
1617 /* Emit a jump instruction to TARGET and return it. If COND is
1618 NULL_RTX, emit an unconditional jump, else a conditional jump under
1619 condition COND. */
1620
1621 rtx_insn *
1622 s390_emit_jump (rtx target, rtx cond)
1623 {
1624 rtx insn;
1625
1626 target = gen_rtx_LABEL_REF (VOIDmode, target);
1627 if (cond)
1628 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1629
1630 insn = gen_rtx_SET (pc_rtx, target);
1631 return emit_jump_insn (insn);
1632 }
1633
1634 /* Return branch condition mask to implement a branch
1635 specified by CODE. Return -1 for invalid comparisons. */
1636
1637 int
1638 s390_branch_condition_mask (rtx code)
1639 {
1640 const int CC0 = 1 << 3;
1641 const int CC1 = 1 << 2;
1642 const int CC2 = 1 << 1;
1643 const int CC3 = 1 << 0;
1644
1645 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1646 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1647 gcc_assert (XEXP (code, 1) == const0_rtx
1648 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1649 && CONST_INT_P (XEXP (code, 1))));
1650
1651
1652 switch (GET_MODE (XEXP (code, 0)))
1653 {
1654 case CCZmode:
1655 case CCZ1mode:
1656 switch (GET_CODE (code))
1657 {
1658 case EQ: return CC0;
1659 case NE: return CC1 | CC2 | CC3;
1660 default: return -1;
1661 }
1662 break;
1663
1664 case CCT1mode:
1665 switch (GET_CODE (code))
1666 {
1667 case EQ: return CC1;
1668 case NE: return CC0 | CC2 | CC3;
1669 default: return -1;
1670 }
1671 break;
1672
1673 case CCT2mode:
1674 switch (GET_CODE (code))
1675 {
1676 case EQ: return CC2;
1677 case NE: return CC0 | CC1 | CC3;
1678 default: return -1;
1679 }
1680 break;
1681
1682 case CCT3mode:
1683 switch (GET_CODE (code))
1684 {
1685 case EQ: return CC3;
1686 case NE: return CC0 | CC1 | CC2;
1687 default: return -1;
1688 }
1689 break;
1690
1691 case CCLmode:
1692 switch (GET_CODE (code))
1693 {
1694 case EQ: return CC0 | CC2;
1695 case NE: return CC1 | CC3;
1696 default: return -1;
1697 }
1698 break;
1699
1700 case CCL1mode:
1701 switch (GET_CODE (code))
1702 {
1703 case LTU: return CC2 | CC3; /* carry */
1704 case GEU: return CC0 | CC1; /* no carry */
1705 default: return -1;
1706 }
1707 break;
1708
1709 case CCL2mode:
1710 switch (GET_CODE (code))
1711 {
1712 case GTU: return CC0 | CC1; /* borrow */
1713 case LEU: return CC2 | CC3; /* no borrow */
1714 default: return -1;
1715 }
1716 break;
1717
1718 case CCL3mode:
1719 switch (GET_CODE (code))
1720 {
1721 case EQ: return CC0 | CC2;
1722 case NE: return CC1 | CC3;
1723 case LTU: return CC1;
1724 case GTU: return CC3;
1725 case LEU: return CC1 | CC2;
1726 case GEU: return CC2 | CC3;
1727 default: return -1;
1728 }
1729
1730 case CCUmode:
1731 switch (GET_CODE (code))
1732 {
1733 case EQ: return CC0;
1734 case NE: return CC1 | CC2 | CC3;
1735 case LTU: return CC1;
1736 case GTU: return CC2;
1737 case LEU: return CC0 | CC1;
1738 case GEU: return CC0 | CC2;
1739 default: return -1;
1740 }
1741 break;
1742
1743 case CCURmode:
1744 switch (GET_CODE (code))
1745 {
1746 case EQ: return CC0;
1747 case NE: return CC2 | CC1 | CC3;
1748 case LTU: return CC2;
1749 case GTU: return CC1;
1750 case LEU: return CC0 | CC2;
1751 case GEU: return CC0 | CC1;
1752 default: return -1;
1753 }
1754 break;
1755
1756 case CCAPmode:
1757 switch (GET_CODE (code))
1758 {
1759 case EQ: return CC0;
1760 case NE: return CC1 | CC2 | CC3;
1761 case LT: return CC1 | CC3;
1762 case GT: return CC2;
1763 case LE: return CC0 | CC1 | CC3;
1764 case GE: return CC0 | CC2;
1765 default: return -1;
1766 }
1767 break;
1768
1769 case CCANmode:
1770 switch (GET_CODE (code))
1771 {
1772 case EQ: return CC0;
1773 case NE: return CC1 | CC2 | CC3;
1774 case LT: return CC1;
1775 case GT: return CC2 | CC3;
1776 case LE: return CC0 | CC1;
1777 case GE: return CC0 | CC2 | CC3;
1778 default: return -1;
1779 }
1780 break;
1781
1782 case CCSmode:
1783 switch (GET_CODE (code))
1784 {
1785 case EQ: return CC0;
1786 case NE: return CC1 | CC2 | CC3;
1787 case LT: return CC1;
1788 case GT: return CC2;
1789 case LE: return CC0 | CC1;
1790 case GE: return CC0 | CC2;
1791 case UNORDERED: return CC3;
1792 case ORDERED: return CC0 | CC1 | CC2;
1793 case UNEQ: return CC0 | CC3;
1794 case UNLT: return CC1 | CC3;
1795 case UNGT: return CC2 | CC3;
1796 case UNLE: return CC0 | CC1 | CC3;
1797 case UNGE: return CC0 | CC2 | CC3;
1798 case LTGT: return CC1 | CC2;
1799 default: return -1;
1800 }
1801 break;
1802
1803 case CCSRmode:
1804 switch (GET_CODE (code))
1805 {
1806 case EQ: return CC0;
1807 case NE: return CC2 | CC1 | CC3;
1808 case LT: return CC2;
1809 case GT: return CC1;
1810 case LE: return CC0 | CC2;
1811 case GE: return CC0 | CC1;
1812 case UNORDERED: return CC3;
1813 case ORDERED: return CC0 | CC2 | CC1;
1814 case UNEQ: return CC0 | CC3;
1815 case UNLT: return CC2 | CC3;
1816 case UNGT: return CC1 | CC3;
1817 case UNLE: return CC0 | CC2 | CC3;
1818 case UNGE: return CC0 | CC1 | CC3;
1819 case LTGT: return CC2 | CC1;
1820 default: return -1;
1821 }
1822 break;
1823
1824 /* Vector comparison modes. */
1825
1826 case CCVEQmode:
1827 switch (GET_CODE (code))
1828 {
1829 case EQ: return CC0;
1830 case NE: return CC3;
1831 default: return -1;
1832 }
1833
1834 case CCVEQANYmode:
1835 switch (GET_CODE (code))
1836 {
1837 case EQ: return CC0 | CC1;
1838 case NE: return CC3 | CC1;
1839 default: return -1;
1840 }
1841
1842 /* Integer vector compare modes. */
1843
1844 case CCVHmode:
1845 switch (GET_CODE (code))
1846 {
1847 case GT: return CC0;
1848 case LE: return CC3;
1849 default: return -1;
1850 }
1851
1852 case CCVHANYmode:
1853 switch (GET_CODE (code))
1854 {
1855 case GT: return CC0 | CC1;
1856 case LE: return CC3 | CC1;
1857 default: return -1;
1858 }
1859
1860 case CCVHUmode:
1861 switch (GET_CODE (code))
1862 {
1863 case GTU: return CC0;
1864 case LEU: return CC3;
1865 default: return -1;
1866 }
1867
1868 case CCVHUANYmode:
1869 switch (GET_CODE (code))
1870 {
1871 case GTU: return CC0 | CC1;
1872 case LEU: return CC3 | CC1;
1873 default: return -1;
1874 }
1875
1876 /* FP vector compare modes. */
1877
1878 case CCVFHmode:
1879 switch (GET_CODE (code))
1880 {
1881 case GT: return CC0;
1882 case UNLE: return CC3;
1883 default: return -1;
1884 }
1885
1886 case CCVFHANYmode:
1887 switch (GET_CODE (code))
1888 {
1889 case GT: return CC0 | CC1;
1890 case UNLE: return CC3 | CC1;
1891 default: return -1;
1892 }
1893
1894 case CCVFHEmode:
1895 switch (GET_CODE (code))
1896 {
1897 case GE: return CC0;
1898 case UNLT: return CC3;
1899 default: return -1;
1900 }
1901
1902 case CCVFHEANYmode:
1903 switch (GET_CODE (code))
1904 {
1905 case GE: return CC0 | CC1;
1906 case UNLT: return CC3 | CC1;
1907 default: return -1;
1908 }
1909
1910
1911 case CCRAWmode:
1912 switch (GET_CODE (code))
1913 {
1914 case EQ:
1915 return INTVAL (XEXP (code, 1));
1916 case NE:
1917 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1918 default:
1919 gcc_unreachable ();
1920 }
1921
1922 default:
1923 return -1;
1924 }
1925 }
1926
1927
1928 /* Return branch condition mask to implement a compare and branch
1929 specified by CODE. Return -1 for invalid comparisons. */
1930
1931 int
1932 s390_compare_and_branch_condition_mask (rtx code)
1933 {
1934 const int CC0 = 1 << 3;
1935 const int CC1 = 1 << 2;
1936 const int CC2 = 1 << 1;
1937
1938 switch (GET_CODE (code))
1939 {
1940 case EQ:
1941 return CC0;
1942 case NE:
1943 return CC1 | CC2;
1944 case LT:
1945 case LTU:
1946 return CC1;
1947 case GT:
1948 case GTU:
1949 return CC2;
1950 case LE:
1951 case LEU:
1952 return CC0 | CC1;
1953 case GE:
1954 case GEU:
1955 return CC0 | CC2;
1956 default:
1957 gcc_unreachable ();
1958 }
1959 return -1;
1960 }
1961
1962 /* If INV is false, return assembler mnemonic string to implement
1963 a branch specified by CODE. If INV is true, return mnemonic
1964 for the corresponding inverted branch. */
1965
1966 static const char *
1967 s390_branch_condition_mnemonic (rtx code, int inv)
1968 {
1969 int mask;
1970
1971 static const char *const mnemonic[16] =
1972 {
1973 NULL, "o", "h", "nle",
1974 "l", "nhe", "lh", "ne",
1975 "e", "nlh", "he", "nl",
1976 "le", "nh", "no", NULL
1977 };
1978
1979 if (GET_CODE (XEXP (code, 0)) == REG
1980 && REGNO (XEXP (code, 0)) == CC_REGNUM
1981 && (XEXP (code, 1) == const0_rtx
1982 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1983 && CONST_INT_P (XEXP (code, 1)))))
1984 mask = s390_branch_condition_mask (code);
1985 else
1986 mask = s390_compare_and_branch_condition_mask (code);
1987
1988 gcc_assert (mask >= 0);
1989
1990 if (inv)
1991 mask ^= 15;
1992
1993 gcc_assert (mask >= 1 && mask <= 14);
1994
1995 return mnemonic[mask];
1996 }
1997
1998 /* Return the part of op which has a value different from def.
1999 The size of the part is determined by mode.
2000 Use this function only if you already know that op really
2001 contains such a part. */
2002
2003 unsigned HOST_WIDE_INT
2004 s390_extract_part (rtx op, machine_mode mode, int def)
2005 {
2006 unsigned HOST_WIDE_INT value = 0;
2007 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2008 int part_bits = GET_MODE_BITSIZE (mode);
2009 unsigned HOST_WIDE_INT part_mask
2010 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2011 int i;
2012
2013 for (i = 0; i < max_parts; i++)
2014 {
2015 if (i == 0)
2016 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2017 else
2018 value >>= part_bits;
2019
2020 if ((value & part_mask) != (def & part_mask))
2021 return value & part_mask;
2022 }
2023
2024 gcc_unreachable ();
2025 }
2026
2027 /* If OP is an integer constant of mode MODE with exactly one
2028 part of mode PART_MODE unequal to DEF, return the number of that
2029 part. Otherwise, return -1. */
2030
2031 int
2032 s390_single_part (rtx op,
2033 machine_mode mode,
2034 machine_mode part_mode,
2035 int def)
2036 {
2037 unsigned HOST_WIDE_INT value = 0;
2038 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2039 unsigned HOST_WIDE_INT part_mask
2040 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2041 int i, part = -1;
2042
2043 if (GET_CODE (op) != CONST_INT)
2044 return -1;
2045
2046 for (i = 0; i < n_parts; i++)
2047 {
2048 if (i == 0)
2049 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2050 else
2051 value >>= GET_MODE_BITSIZE (part_mode);
2052
2053 if ((value & part_mask) != (def & part_mask))
2054 {
2055 if (part != -1)
2056 return -1;
2057 else
2058 part = i;
2059 }
2060 }
2061 return part == -1 ? -1 : n_parts - 1 - part;
2062 }
2063
2064 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2065 bits and no other bits are set in IN. POS and LENGTH can be used
2066 to obtain the start position and the length of the bitfield.
2067
2068 POS gives the position of the first bit of the bitfield counting
2069 from the lowest order bit starting with zero. In order to use this
2070 value for S/390 instructions this has to be converted to "bits big
2071 endian" style. */
2072
2073 bool
2074 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2075 int *pos, int *length)
2076 {
2077 int tmp_pos = 0;
2078 int tmp_length = 0;
2079 int i;
2080 unsigned HOST_WIDE_INT mask = 1ULL;
2081 bool contiguous = false;
2082
2083 for (i = 0; i < size; mask <<= 1, i++)
2084 {
2085 if (contiguous)
2086 {
2087 if (mask & in)
2088 tmp_length++;
2089 else
2090 break;
2091 }
2092 else
2093 {
2094 if (mask & in)
2095 {
2096 contiguous = true;
2097 tmp_length++;
2098 }
2099 else
2100 tmp_pos++;
2101 }
2102 }
2103
2104 if (!tmp_length)
2105 return false;
2106
2107 /* Calculate a mask for all bits beyond the contiguous bits. */
2108 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2109
2110 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2111 mask &= (HOST_WIDE_INT_1U << size) - 1;
2112
2113 if (mask & in)
2114 return false;
2115
2116 if (tmp_length + tmp_pos - 1 > size)
2117 return false;
2118
2119 if (length)
2120 *length = tmp_length;
2121
2122 if (pos)
2123 *pos = tmp_pos;
2124
2125 return true;
2126 }
2127
2128 /* Return true if OP contains the same contiguous bitfield in *all*
2129 its elements. START and END can be used to obtain the start and
2130 end position of the bitfield.
2131
2132 START/STOP give the position of the first/last bit of the bitfield
2133 counting from the lowest order bit starting with zero. In order to
2134 use these values for S/390 instructions this has to be converted to
2135 "bits big endian" style. */
2136
2137 bool
2138 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2139 {
2140 unsigned HOST_WIDE_INT mask;
2141 int length, size;
2142
2143 if (!VECTOR_MODE_P (GET_MODE (op))
2144 || GET_CODE (op) != CONST_VECTOR
2145 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2146 return false;
2147
2148 if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
2149 {
2150 int i;
2151
2152 for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
2153 if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
2154 return false;
2155 }
2156
2157 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2158 mask = UINTVAL (XVECEXP (op, 0, 0));
2159 if (s390_contiguous_bitmask_p (mask, size, start,
2160 end != NULL ? &length : NULL))
2161 {
2162 if (end != NULL)
2163 *end = *start + length - 1;
2164 return true;
2165 }
2166 /* 0xff00000f style immediates can be covered by swapping start and
2167 end indices in vgm. */
2168 if (s390_contiguous_bitmask_p (~mask, size, start,
2169 end != NULL ? &length : NULL))
2170 {
2171 if (end != NULL)
2172 *end = *start - 1;
2173 if (start != NULL)
2174 *start = *start + length;
2175 return true;
2176 }
2177 return false;
2178 }
2179
2180 /* Return true if C consists only of byte chunks being either 0 or
2181 0xff. If MASK is !=NULL a byte mask is generated which is
2182 appropriate for the vector generate byte mask instruction. */
2183
2184 bool
2185 s390_bytemask_vector_p (rtx op, unsigned *mask)
2186 {
2187 int i;
2188 unsigned tmp_mask = 0;
2189 int nunit, unit_size;
2190
2191 if (!VECTOR_MODE_P (GET_MODE (op))
2192 || GET_CODE (op) != CONST_VECTOR
2193 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2194 return false;
2195
2196 nunit = GET_MODE_NUNITS (GET_MODE (op));
2197 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2198
2199 for (i = 0; i < nunit; i++)
2200 {
2201 unsigned HOST_WIDE_INT c;
2202 int j;
2203
2204 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2205 return false;
2206
2207 c = UINTVAL (XVECEXP (op, 0, i));
2208 for (j = 0; j < unit_size; j++)
2209 {
2210 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2211 return false;
2212 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2213 c = c >> BITS_PER_UNIT;
2214 }
2215 }
2216
2217 if (mask != NULL)
2218 *mask = tmp_mask;
2219
2220 return true;
2221 }
2222
2223 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2224 equivalent to a shift followed by the AND. In particular, CONTIG
2225 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2226 for ROTL indicate a rotate to the right. */
2227
2228 bool
2229 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2230 {
2231 int pos, len;
2232 bool ok;
2233
2234 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2235 gcc_assert (ok);
2236
2237 return ((rotl >= 0 && rotl <= pos)
2238 || (rotl < 0 && -rotl <= bitsize - len - pos));
2239 }
2240
2241 /* Check whether we can (and want to) split a double-word
2242 move in mode MODE from SRC to DST into two single-word
2243 moves, moving the subword FIRST_SUBWORD first. */
2244
2245 bool
2246 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2247 {
2248 /* Floating point and vector registers cannot be split. */
2249 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2250 return false;
2251
2252 /* We don't need to split if operands are directly accessible. */
2253 if (s_operand (src, mode) || s_operand (dst, mode))
2254 return false;
2255
2256 /* Non-offsettable memory references cannot be split. */
2257 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2258 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2259 return false;
2260
2261 /* Moving the first subword must not clobber a register
2262 needed to move the second subword. */
2263 if (register_operand (dst, mode))
2264 {
2265 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2266 if (reg_overlap_mentioned_p (subreg, src))
2267 return false;
2268 }
2269
2270 return true;
2271 }
2272
2273 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2274 and [MEM2, MEM2 + SIZE] do overlap and false
2275 otherwise. */
2276
2277 bool
2278 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2279 {
2280 rtx addr1, addr2, addr_delta;
2281 HOST_WIDE_INT delta;
2282
2283 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2284 return true;
2285
2286 if (size == 0)
2287 return false;
2288
2289 addr1 = XEXP (mem1, 0);
2290 addr2 = XEXP (mem2, 0);
2291
2292 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2293
2294 /* This overlapping check is used by peepholes merging memory block operations.
2295 Overlapping operations would otherwise be recognized by the S/390 hardware
2296 and would fall back to a slower implementation. Allowing overlapping
2297 operations would lead to slow code but not to wrong code. Therefore we are
2298 somewhat optimistic if we cannot prove that the memory blocks are
2299 overlapping.
2300 That's why we return false here although this may accept operations on
2301 overlapping memory areas. */
2302 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2303 return false;
2304
2305 delta = INTVAL (addr_delta);
2306
2307 if (delta == 0
2308 || (delta > 0 && delta < size)
2309 || (delta < 0 && -delta < size))
2310 return true;
2311
2312 return false;
2313 }
2314
2315 /* Check whether the address of memory reference MEM2 equals exactly
2316 the address of memory reference MEM1 plus DELTA. Return true if
2317 we can prove this to be the case, false otherwise. */
2318
2319 bool
2320 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2321 {
2322 rtx addr1, addr2, addr_delta;
2323
2324 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2325 return false;
2326
2327 addr1 = XEXP (mem1, 0);
2328 addr2 = XEXP (mem2, 0);
2329
2330 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2331 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2332 return false;
2333
2334 return true;
2335 }
2336
2337 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2338
2339 void
2340 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2341 rtx *operands)
2342 {
2343 machine_mode wmode = mode;
2344 rtx dst = operands[0];
2345 rtx src1 = operands[1];
2346 rtx src2 = operands[2];
2347 rtx op, clob, tem;
2348
2349 /* If we cannot handle the operation directly, use a temp register. */
2350 if (!s390_logical_operator_ok_p (operands))
2351 dst = gen_reg_rtx (mode);
2352
2353 /* QImode and HImode patterns make sense only if we have a destination
2354 in memory. Otherwise perform the operation in SImode. */
2355 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2356 wmode = SImode;
2357
2358 /* Widen operands if required. */
2359 if (mode != wmode)
2360 {
2361 if (GET_CODE (dst) == SUBREG
2362 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2363 dst = tem;
2364 else if (REG_P (dst))
2365 dst = gen_rtx_SUBREG (wmode, dst, 0);
2366 else
2367 dst = gen_reg_rtx (wmode);
2368
2369 if (GET_CODE (src1) == SUBREG
2370 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2371 src1 = tem;
2372 else if (GET_MODE (src1) != VOIDmode)
2373 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2374
2375 if (GET_CODE (src2) == SUBREG
2376 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2377 src2 = tem;
2378 else if (GET_MODE (src2) != VOIDmode)
2379 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2380 }
2381
2382 /* Emit the instruction. */
2383 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2384 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2385 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2386
2387 /* Fix up the destination if needed. */
2388 if (dst != operands[0])
2389 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2390 }
2391
2392 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2393
2394 bool
2395 s390_logical_operator_ok_p (rtx *operands)
2396 {
2397 /* If the destination operand is in memory, it needs to coincide
2398 with one of the source operands. After reload, it has to be
2399 the first source operand. */
2400 if (GET_CODE (operands[0]) == MEM)
2401 return rtx_equal_p (operands[0], operands[1])
2402 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2403
2404 return true;
2405 }
2406
2407 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2408 operand IMMOP to switch from SS to SI type instructions. */
2409
2410 void
2411 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2412 {
2413 int def = code == AND ? -1 : 0;
2414 HOST_WIDE_INT mask;
2415 int part;
2416
2417 gcc_assert (GET_CODE (*memop) == MEM);
2418 gcc_assert (!MEM_VOLATILE_P (*memop));
2419
2420 mask = s390_extract_part (*immop, QImode, def);
2421 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2422 gcc_assert (part >= 0);
2423
2424 *memop = adjust_address (*memop, QImode, part);
2425 *immop = gen_int_mode (mask, QImode);
2426 }
2427
2428
2429 /* How to allocate a 'struct machine_function'. */
2430
2431 static struct machine_function *
2432 s390_init_machine_status (void)
2433 {
2434 return ggc_cleared_alloc<machine_function> ();
2435 }
2436
2437 /* Map for smallest class containing reg regno. */
2438
2439 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2440 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2441 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2442 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2443 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2444 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2445 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2446 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2447 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2448 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2449 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2450 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2451 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2452 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2453 VEC_REGS, VEC_REGS /* 52 */
2454 };
2455
2456 /* Return attribute type of insn. */
2457
2458 static enum attr_type
2459 s390_safe_attr_type (rtx_insn *insn)
2460 {
2461 if (recog_memoized (insn) >= 0)
2462 return get_attr_type (insn);
2463 else
2464 return TYPE_NONE;
2465 }
2466
2467 /* Return true if DISP is a valid short displacement. */
2468
2469 static bool
2470 s390_short_displacement (rtx disp)
2471 {
2472 /* No displacement is OK. */
2473 if (!disp)
2474 return true;
2475
2476 /* Without the long displacement facility we don't need to
2477 distingiush between long and short displacement. */
2478 if (!TARGET_LONG_DISPLACEMENT)
2479 return true;
2480
2481 /* Integer displacement in range. */
2482 if (GET_CODE (disp) == CONST_INT)
2483 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2484
2485 /* GOT offset is not OK, the GOT can be large. */
2486 if (GET_CODE (disp) == CONST
2487 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2488 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2489 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2490 return false;
2491
2492 /* All other symbolic constants are literal pool references,
2493 which are OK as the literal pool must be small. */
2494 if (GET_CODE (disp) == CONST)
2495 return true;
2496
2497 return false;
2498 }
2499
2500 /* Decompose a RTL expression ADDR for a memory address into
2501 its components, returned in OUT.
2502
2503 Returns false if ADDR is not a valid memory address, true
2504 otherwise. If OUT is NULL, don't return the components,
2505 but check for validity only.
2506
2507 Note: Only addresses in canonical form are recognized.
2508 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2509 canonical form so that they will be recognized. */
2510
2511 static int
2512 s390_decompose_address (rtx addr, struct s390_address *out)
2513 {
2514 HOST_WIDE_INT offset = 0;
2515 rtx base = NULL_RTX;
2516 rtx indx = NULL_RTX;
2517 rtx disp = NULL_RTX;
2518 rtx orig_disp;
2519 bool pointer = false;
2520 bool base_ptr = false;
2521 bool indx_ptr = false;
2522 bool literal_pool = false;
2523
2524 /* We may need to substitute the literal pool base register into the address
2525 below. However, at this point we do not know which register is going to
2526 be used as base, so we substitute the arg pointer register. This is going
2527 to be treated as holding a pointer below -- it shouldn't be used for any
2528 other purpose. */
2529 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2530
2531 /* Decompose address into base + index + displacement. */
2532
2533 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2534 base = addr;
2535
2536 else if (GET_CODE (addr) == PLUS)
2537 {
2538 rtx op0 = XEXP (addr, 0);
2539 rtx op1 = XEXP (addr, 1);
2540 enum rtx_code code0 = GET_CODE (op0);
2541 enum rtx_code code1 = GET_CODE (op1);
2542
2543 if (code0 == REG || code0 == UNSPEC)
2544 {
2545 if (code1 == REG || code1 == UNSPEC)
2546 {
2547 indx = op0; /* index + base */
2548 base = op1;
2549 }
2550
2551 else
2552 {
2553 base = op0; /* base + displacement */
2554 disp = op1;
2555 }
2556 }
2557
2558 else if (code0 == PLUS)
2559 {
2560 indx = XEXP (op0, 0); /* index + base + disp */
2561 base = XEXP (op0, 1);
2562 disp = op1;
2563 }
2564
2565 else
2566 {
2567 return false;
2568 }
2569 }
2570
2571 else
2572 disp = addr; /* displacement */
2573
2574 /* Extract integer part of displacement. */
2575 orig_disp = disp;
2576 if (disp)
2577 {
2578 if (GET_CODE (disp) == CONST_INT)
2579 {
2580 offset = INTVAL (disp);
2581 disp = NULL_RTX;
2582 }
2583 else if (GET_CODE (disp) == CONST
2584 && GET_CODE (XEXP (disp, 0)) == PLUS
2585 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2586 {
2587 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2588 disp = XEXP (XEXP (disp, 0), 0);
2589 }
2590 }
2591
2592 /* Strip off CONST here to avoid special case tests later. */
2593 if (disp && GET_CODE (disp) == CONST)
2594 disp = XEXP (disp, 0);
2595
2596 /* We can convert literal pool addresses to
2597 displacements by basing them off the base register. */
2598 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2599 {
2600 /* Either base or index must be free to hold the base register. */
2601 if (!base)
2602 base = fake_pool_base, literal_pool = true;
2603 else if (!indx)
2604 indx = fake_pool_base, literal_pool = true;
2605 else
2606 return false;
2607
2608 /* Mark up the displacement. */
2609 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2610 UNSPEC_LTREL_OFFSET);
2611 }
2612
2613 /* Validate base register. */
2614 if (base)
2615 {
2616 if (GET_CODE (base) == UNSPEC)
2617 switch (XINT (base, 1))
2618 {
2619 case UNSPEC_LTREF:
2620 if (!disp)
2621 disp = gen_rtx_UNSPEC (Pmode,
2622 gen_rtvec (1, XVECEXP (base, 0, 0)),
2623 UNSPEC_LTREL_OFFSET);
2624 else
2625 return false;
2626
2627 base = XVECEXP (base, 0, 1);
2628 break;
2629
2630 case UNSPEC_LTREL_BASE:
2631 if (XVECLEN (base, 0) == 1)
2632 base = fake_pool_base, literal_pool = true;
2633 else
2634 base = XVECEXP (base, 0, 1);
2635 break;
2636
2637 default:
2638 return false;
2639 }
2640
2641 if (!REG_P (base)
2642 || (GET_MODE (base) != SImode
2643 && GET_MODE (base) != Pmode))
2644 return false;
2645
2646 if (REGNO (base) == STACK_POINTER_REGNUM
2647 || REGNO (base) == FRAME_POINTER_REGNUM
2648 || ((reload_completed || reload_in_progress)
2649 && frame_pointer_needed
2650 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2651 || REGNO (base) == ARG_POINTER_REGNUM
2652 || (flag_pic
2653 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2654 pointer = base_ptr = true;
2655
2656 if ((reload_completed || reload_in_progress)
2657 && base == cfun->machine->base_reg)
2658 pointer = base_ptr = literal_pool = true;
2659 }
2660
2661 /* Validate index register. */
2662 if (indx)
2663 {
2664 if (GET_CODE (indx) == UNSPEC)
2665 switch (XINT (indx, 1))
2666 {
2667 case UNSPEC_LTREF:
2668 if (!disp)
2669 disp = gen_rtx_UNSPEC (Pmode,
2670 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2671 UNSPEC_LTREL_OFFSET);
2672 else
2673 return false;
2674
2675 indx = XVECEXP (indx, 0, 1);
2676 break;
2677
2678 case UNSPEC_LTREL_BASE:
2679 if (XVECLEN (indx, 0) == 1)
2680 indx = fake_pool_base, literal_pool = true;
2681 else
2682 indx = XVECEXP (indx, 0, 1);
2683 break;
2684
2685 default:
2686 return false;
2687 }
2688
2689 if (!REG_P (indx)
2690 || (GET_MODE (indx) != SImode
2691 && GET_MODE (indx) != Pmode))
2692 return false;
2693
2694 if (REGNO (indx) == STACK_POINTER_REGNUM
2695 || REGNO (indx) == FRAME_POINTER_REGNUM
2696 || ((reload_completed || reload_in_progress)
2697 && frame_pointer_needed
2698 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2699 || REGNO (indx) == ARG_POINTER_REGNUM
2700 || (flag_pic
2701 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2702 pointer = indx_ptr = true;
2703
2704 if ((reload_completed || reload_in_progress)
2705 && indx == cfun->machine->base_reg)
2706 pointer = indx_ptr = literal_pool = true;
2707 }
2708
2709 /* Prefer to use pointer as base, not index. */
2710 if (base && indx && !base_ptr
2711 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2712 {
2713 rtx tmp = base;
2714 base = indx;
2715 indx = tmp;
2716 }
2717
2718 /* Validate displacement. */
2719 if (!disp)
2720 {
2721 /* If virtual registers are involved, the displacement will change later
2722 anyway as the virtual registers get eliminated. This could make a
2723 valid displacement invalid, but it is more likely to make an invalid
2724 displacement valid, because we sometimes access the register save area
2725 via negative offsets to one of those registers.
2726 Thus we don't check the displacement for validity here. If after
2727 elimination the displacement turns out to be invalid after all,
2728 this is fixed up by reload in any case. */
2729 /* LRA maintains always displacements up to date and we need to
2730 know the displacement is right during all LRA not only at the
2731 final elimination. */
2732 if (lra_in_progress
2733 || (base != arg_pointer_rtx
2734 && indx != arg_pointer_rtx
2735 && base != return_address_pointer_rtx
2736 && indx != return_address_pointer_rtx
2737 && base != frame_pointer_rtx
2738 && indx != frame_pointer_rtx
2739 && base != virtual_stack_vars_rtx
2740 && indx != virtual_stack_vars_rtx))
2741 if (!DISP_IN_RANGE (offset))
2742 return false;
2743 }
2744 else
2745 {
2746 /* All the special cases are pointers. */
2747 pointer = true;
2748
2749 /* In the small-PIC case, the linker converts @GOT
2750 and @GOTNTPOFF offsets to possible displacements. */
2751 if (GET_CODE (disp) == UNSPEC
2752 && (XINT (disp, 1) == UNSPEC_GOT
2753 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2754 && flag_pic == 1)
2755 {
2756 ;
2757 }
2758
2759 /* Accept pool label offsets. */
2760 else if (GET_CODE (disp) == UNSPEC
2761 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2762 ;
2763
2764 /* Accept literal pool references. */
2765 else if (GET_CODE (disp) == UNSPEC
2766 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2767 {
2768 /* In case CSE pulled a non literal pool reference out of
2769 the pool we have to reject the address. This is
2770 especially important when loading the GOT pointer on non
2771 zarch CPUs. In this case the literal pool contains an lt
2772 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2773 will most likely exceed the displacement. */
2774 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2775 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2776 return false;
2777
2778 orig_disp = gen_rtx_CONST (Pmode, disp);
2779 if (offset)
2780 {
2781 /* If we have an offset, make sure it does not
2782 exceed the size of the constant pool entry. */
2783 rtx sym = XVECEXP (disp, 0, 0);
2784 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2785 return false;
2786
2787 orig_disp = plus_constant (Pmode, orig_disp, offset);
2788 }
2789 }
2790
2791 else
2792 return false;
2793 }
2794
2795 if (!base && !indx)
2796 pointer = true;
2797
2798 if (out)
2799 {
2800 out->base = base;
2801 out->indx = indx;
2802 out->disp = orig_disp;
2803 out->pointer = pointer;
2804 out->literal_pool = literal_pool;
2805 }
2806
2807 return true;
2808 }
2809
2810 /* Decompose a RTL expression OP for a shift count into its components,
2811 and return the base register in BASE and the offset in OFFSET.
2812
2813 Return true if OP is a valid shift count, false if not. */
2814
2815 bool
2816 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2817 {
2818 HOST_WIDE_INT off = 0;
2819
2820 /* We can have an integer constant, an address register,
2821 or a sum of the two. */
2822 if (GET_CODE (op) == CONST_INT)
2823 {
2824 off = INTVAL (op);
2825 op = NULL_RTX;
2826 }
2827 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2828 {
2829 off = INTVAL (XEXP (op, 1));
2830 op = XEXP (op, 0);
2831 }
2832 while (op && GET_CODE (op) == SUBREG)
2833 op = SUBREG_REG (op);
2834
2835 if (op && GET_CODE (op) != REG)
2836 return false;
2837
2838 if (offset)
2839 *offset = off;
2840 if (base)
2841 *base = op;
2842
2843 return true;
2844 }
2845
2846
2847 /* Return true if CODE is a valid address without index. */
2848
2849 bool
2850 s390_legitimate_address_without_index_p (rtx op)
2851 {
2852 struct s390_address addr;
2853
2854 if (!s390_decompose_address (XEXP (op, 0), &addr))
2855 return false;
2856 if (addr.indx)
2857 return false;
2858
2859 return true;
2860 }
2861
2862
2863 /* Return TRUE if ADDR is an operand valid for a load/store relative
2864 instruction. Be aware that the alignment of the operand needs to
2865 be checked separately.
2866 Valid addresses are single references or a sum of a reference and a
2867 constant integer. Return these parts in SYMREF and ADDEND. You can
2868 pass NULL in REF and/or ADDEND if you are not interested in these
2869 values. Literal pool references are *not* considered symbol
2870 references. */
2871
2872 static bool
2873 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2874 {
2875 HOST_WIDE_INT tmpaddend = 0;
2876
2877 if (GET_CODE (addr) == CONST)
2878 addr = XEXP (addr, 0);
2879
2880 if (GET_CODE (addr) == PLUS)
2881 {
2882 if (!CONST_INT_P (XEXP (addr, 1)))
2883 return false;
2884
2885 tmpaddend = INTVAL (XEXP (addr, 1));
2886 addr = XEXP (addr, 0);
2887 }
2888
2889 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2890 || (GET_CODE (addr) == UNSPEC
2891 && (XINT (addr, 1) == UNSPEC_GOTENT
2892 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2893 {
2894 if (symref)
2895 *symref = addr;
2896 if (addend)
2897 *addend = tmpaddend;
2898
2899 return true;
2900 }
2901 return false;
2902 }
2903
2904 /* Return true if the address in OP is valid for constraint letter C
2905 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2906 pool MEMs should be accepted. Only the Q, R, S, T constraint
2907 letters are allowed for C. */
2908
2909 static int
2910 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2911 {
2912 struct s390_address addr;
2913 bool decomposed = false;
2914
2915 /* This check makes sure that no symbolic address (except literal
2916 pool references) are accepted by the R or T constraints. */
2917 if (s390_loadrelative_operand_p (op, NULL, NULL))
2918 return 0;
2919
2920 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2921 if (!lit_pool_ok)
2922 {
2923 if (!s390_decompose_address (op, &addr))
2924 return 0;
2925 if (addr.literal_pool)
2926 return 0;
2927 decomposed = true;
2928 }
2929
2930 switch (c)
2931 {
2932 case 'Q': /* no index short displacement */
2933 if (!decomposed && !s390_decompose_address (op, &addr))
2934 return 0;
2935 if (addr.indx)
2936 return 0;
2937 if (!s390_short_displacement (addr.disp))
2938 return 0;
2939 break;
2940
2941 case 'R': /* with index short displacement */
2942 if (TARGET_LONG_DISPLACEMENT)
2943 {
2944 if (!decomposed && !s390_decompose_address (op, &addr))
2945 return 0;
2946 if (!s390_short_displacement (addr.disp))
2947 return 0;
2948 }
2949 /* Any invalid address here will be fixed up by reload,
2950 so accept it for the most generic constraint. */
2951 break;
2952
2953 case 'S': /* no index long displacement */
2954 if (!TARGET_LONG_DISPLACEMENT)
2955 return 0;
2956 if (!decomposed && !s390_decompose_address (op, &addr))
2957 return 0;
2958 if (addr.indx)
2959 return 0;
2960 if (s390_short_displacement (addr.disp))
2961 return 0;
2962 break;
2963
2964 case 'T': /* with index long displacement */
2965 if (!TARGET_LONG_DISPLACEMENT)
2966 return 0;
2967 /* Any invalid address here will be fixed up by reload,
2968 so accept it for the most generic constraint. */
2969 if ((decomposed || s390_decompose_address (op, &addr))
2970 && s390_short_displacement (addr.disp))
2971 return 0;
2972 break;
2973 default:
2974 return 0;
2975 }
2976 return 1;
2977 }
2978
2979
2980 /* Evaluates constraint strings described by the regular expression
2981 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2982 the constraint given in STR, or 0 else. */
2983
2984 int
2985 s390_mem_constraint (const char *str, rtx op)
2986 {
2987 char c = str[0];
2988
2989 switch (c)
2990 {
2991 case 'A':
2992 /* Check for offsettable variants of memory constraints. */
2993 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2994 return 0;
2995 if ((reload_completed || reload_in_progress)
2996 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2997 return 0;
2998 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2999 case 'B':
3000 /* Check for non-literal-pool variants of memory constraints. */
3001 if (!MEM_P (op))
3002 return 0;
3003 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3004 case 'Q':
3005 case 'R':
3006 case 'S':
3007 case 'T':
3008 if (GET_CODE (op) != MEM)
3009 return 0;
3010 return s390_check_qrst_address (c, XEXP (op, 0), true);
3011 case 'U':
3012 return (s390_check_qrst_address ('Q', op, true)
3013 || s390_check_qrst_address ('R', op, true));
3014 case 'W':
3015 return (s390_check_qrst_address ('S', op, true)
3016 || s390_check_qrst_address ('T', op, true));
3017 case 'Y':
3018 /* Simply check for the basic form of a shift count. Reload will
3019 take care of making sure we have a proper base register. */
3020 if (!s390_decompose_shift_count (op, NULL, NULL))
3021 return 0;
3022 break;
3023 case 'Z':
3024 return s390_check_qrst_address (str[1], op, true);
3025 default:
3026 return 0;
3027 }
3028 return 1;
3029 }
3030
3031
3032 /* Evaluates constraint strings starting with letter O. Input
3033 parameter C is the second letter following the "O" in the constraint
3034 string. Returns 1 if VALUE meets the respective constraint and 0
3035 otherwise. */
3036
3037 int
3038 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3039 {
3040 if (!TARGET_EXTIMM)
3041 return 0;
3042
3043 switch (c)
3044 {
3045 case 's':
3046 return trunc_int_for_mode (value, SImode) == value;
3047
3048 case 'p':
3049 return value == 0
3050 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3051
3052 case 'n':
3053 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3054
3055 default:
3056 gcc_unreachable ();
3057 }
3058 }
3059
3060
3061 /* Evaluates constraint strings starting with letter N. Parameter STR
3062 contains the letters following letter "N" in the constraint string.
3063 Returns true if VALUE matches the constraint. */
3064
3065 int
3066 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3067 {
3068 machine_mode mode, part_mode;
3069 int def;
3070 int part, part_goal;
3071
3072
3073 if (str[0] == 'x')
3074 part_goal = -1;
3075 else
3076 part_goal = str[0] - '0';
3077
3078 switch (str[1])
3079 {
3080 case 'Q':
3081 part_mode = QImode;
3082 break;
3083 case 'H':
3084 part_mode = HImode;
3085 break;
3086 case 'S':
3087 part_mode = SImode;
3088 break;
3089 default:
3090 return 0;
3091 }
3092
3093 switch (str[2])
3094 {
3095 case 'H':
3096 mode = HImode;
3097 break;
3098 case 'S':
3099 mode = SImode;
3100 break;
3101 case 'D':
3102 mode = DImode;
3103 break;
3104 default:
3105 return 0;
3106 }
3107
3108 switch (str[3])
3109 {
3110 case '0':
3111 def = 0;
3112 break;
3113 case 'F':
3114 def = -1;
3115 break;
3116 default:
3117 return 0;
3118 }
3119
3120 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3121 return 0;
3122
3123 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3124 if (part < 0)
3125 return 0;
3126 if (part_goal != -1 && part_goal != part)
3127 return 0;
3128
3129 return 1;
3130 }
3131
3132
3133 /* Returns true if the input parameter VALUE is a float zero. */
3134
3135 int
3136 s390_float_const_zero_p (rtx value)
3137 {
3138 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3139 && value == CONST0_RTX (GET_MODE (value)));
3140 }
3141
3142 /* Implement TARGET_REGISTER_MOVE_COST. */
3143
3144 static int
3145 s390_register_move_cost (machine_mode mode,
3146 reg_class_t from, reg_class_t to)
3147 {
3148 /* On s390, copy between fprs and gprs is expensive. */
3149
3150 /* It becomes somewhat faster having ldgr/lgdr. */
3151 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3152 {
3153 /* ldgr is single cycle. */
3154 if (reg_classes_intersect_p (from, GENERAL_REGS)
3155 && reg_classes_intersect_p (to, FP_REGS))
3156 return 1;
3157 /* lgdr needs 3 cycles. */
3158 if (reg_classes_intersect_p (to, GENERAL_REGS)
3159 && reg_classes_intersect_p (from, FP_REGS))
3160 return 3;
3161 }
3162
3163 /* Otherwise copying is done via memory. */
3164 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3165 && reg_classes_intersect_p (to, FP_REGS))
3166 || (reg_classes_intersect_p (from, FP_REGS)
3167 && reg_classes_intersect_p (to, GENERAL_REGS)))
3168 return 10;
3169
3170 return 1;
3171 }
3172
3173 /* Implement TARGET_MEMORY_MOVE_COST. */
3174
3175 static int
3176 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3177 reg_class_t rclass ATTRIBUTE_UNUSED,
3178 bool in ATTRIBUTE_UNUSED)
3179 {
3180 return 2;
3181 }
3182
3183 /* Compute a (partial) cost for rtx X. Return true if the complete
3184 cost has been computed, and false if subexpressions should be
3185 scanned. In either case, *TOTAL contains the cost result.
3186 CODE contains GET_CODE (x), OUTER_CODE contains the code
3187 of the superexpression of x. */
3188
3189 static bool
3190 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3191 int *total, bool speed ATTRIBUTE_UNUSED)
3192 {
3193 switch (code)
3194 {
3195 case CONST:
3196 case CONST_INT:
3197 case LABEL_REF:
3198 case SYMBOL_REF:
3199 case CONST_DOUBLE:
3200 case MEM:
3201 *total = 0;
3202 return true;
3203
3204 case ASHIFT:
3205 case ASHIFTRT:
3206 case LSHIFTRT:
3207 case ROTATE:
3208 case ROTATERT:
3209 case AND:
3210 case IOR:
3211 case XOR:
3212 case NEG:
3213 case NOT:
3214 *total = COSTS_N_INSNS (1);
3215 return false;
3216
3217 case PLUS:
3218 case MINUS:
3219 *total = COSTS_N_INSNS (1);
3220 return false;
3221
3222 case MULT:
3223 switch (GET_MODE (x))
3224 {
3225 case SImode:
3226 {
3227 rtx left = XEXP (x, 0);
3228 rtx right = XEXP (x, 1);
3229 if (GET_CODE (right) == CONST_INT
3230 && CONST_OK_FOR_K (INTVAL (right)))
3231 *total = s390_cost->mhi;
3232 else if (GET_CODE (left) == SIGN_EXTEND)
3233 *total = s390_cost->mh;
3234 else
3235 *total = s390_cost->ms; /* msr, ms, msy */
3236 break;
3237 }
3238 case DImode:
3239 {
3240 rtx left = XEXP (x, 0);
3241 rtx right = XEXP (x, 1);
3242 if (TARGET_ZARCH)
3243 {
3244 if (GET_CODE (right) == CONST_INT
3245 && CONST_OK_FOR_K (INTVAL (right)))
3246 *total = s390_cost->mghi;
3247 else if (GET_CODE (left) == SIGN_EXTEND)
3248 *total = s390_cost->msgf;
3249 else
3250 *total = s390_cost->msg; /* msgr, msg */
3251 }
3252 else /* TARGET_31BIT */
3253 {
3254 if (GET_CODE (left) == SIGN_EXTEND
3255 && GET_CODE (right) == SIGN_EXTEND)
3256 /* mulsidi case: mr, m */
3257 *total = s390_cost->m;
3258 else if (GET_CODE (left) == ZERO_EXTEND
3259 && GET_CODE (right) == ZERO_EXTEND
3260 && TARGET_CPU_ZARCH)
3261 /* umulsidi case: ml, mlr */
3262 *total = s390_cost->ml;
3263 else
3264 /* Complex calculation is required. */
3265 *total = COSTS_N_INSNS (40);
3266 }
3267 break;
3268 }
3269 case SFmode:
3270 case DFmode:
3271 *total = s390_cost->mult_df;
3272 break;
3273 case TFmode:
3274 *total = s390_cost->mxbr;
3275 break;
3276 default:
3277 return false;
3278 }
3279 return false;
3280
3281 case FMA:
3282 switch (GET_MODE (x))
3283 {
3284 case DFmode:
3285 *total = s390_cost->madbr;
3286 break;
3287 case SFmode:
3288 *total = s390_cost->maebr;
3289 break;
3290 default:
3291 return false;
3292 }
3293 /* Negate in the third argument is free: FMSUB. */
3294 if (GET_CODE (XEXP (x, 2)) == NEG)
3295 {
3296 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
3297 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
3298 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
3299 return true;
3300 }
3301 return false;
3302
3303 case UDIV:
3304 case UMOD:
3305 if (GET_MODE (x) == TImode) /* 128 bit division */
3306 *total = s390_cost->dlgr;
3307 else if (GET_MODE (x) == DImode)
3308 {
3309 rtx right = XEXP (x, 1);
3310 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3311 *total = s390_cost->dlr;
3312 else /* 64 by 64 bit division */
3313 *total = s390_cost->dlgr;
3314 }
3315 else if (GET_MODE (x) == SImode) /* 32 bit division */
3316 *total = s390_cost->dlr;
3317 return false;
3318
3319 case DIV:
3320 case MOD:
3321 if (GET_MODE (x) == DImode)
3322 {
3323 rtx right = XEXP (x, 1);
3324 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3325 if (TARGET_ZARCH)
3326 *total = s390_cost->dsgfr;
3327 else
3328 *total = s390_cost->dr;
3329 else /* 64 by 64 bit division */
3330 *total = s390_cost->dsgr;
3331 }
3332 else if (GET_MODE (x) == SImode) /* 32 bit division */
3333 *total = s390_cost->dlr;
3334 else if (GET_MODE (x) == SFmode)
3335 {
3336 *total = s390_cost->debr;
3337 }
3338 else if (GET_MODE (x) == DFmode)
3339 {
3340 *total = s390_cost->ddbr;
3341 }
3342 else if (GET_MODE (x) == TFmode)
3343 {
3344 *total = s390_cost->dxbr;
3345 }
3346 return false;
3347
3348 case SQRT:
3349 if (GET_MODE (x) == SFmode)
3350 *total = s390_cost->sqebr;
3351 else if (GET_MODE (x) == DFmode)
3352 *total = s390_cost->sqdbr;
3353 else /* TFmode */
3354 *total = s390_cost->sqxbr;
3355 return false;
3356
3357 case SIGN_EXTEND:
3358 case ZERO_EXTEND:
3359 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3360 || outer_code == PLUS || outer_code == MINUS
3361 || outer_code == COMPARE)
3362 *total = 0;
3363 return false;
3364
3365 case COMPARE:
3366 *total = COSTS_N_INSNS (1);
3367 if (GET_CODE (XEXP (x, 0)) == AND
3368 && GET_CODE (XEXP (x, 1)) == CONST_INT
3369 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3370 {
3371 rtx op0 = XEXP (XEXP (x, 0), 0);
3372 rtx op1 = XEXP (XEXP (x, 0), 1);
3373 rtx op2 = XEXP (x, 1);
3374
3375 if (memory_operand (op0, GET_MODE (op0))
3376 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3377 return true;
3378 if (register_operand (op0, GET_MODE (op0))
3379 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3380 return true;
3381 }
3382 return false;
3383
3384 default:
3385 return false;
3386 }
3387 }
3388
3389 /* Return the cost of an address rtx ADDR. */
3390
3391 static int
3392 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3393 addr_space_t as ATTRIBUTE_UNUSED,
3394 bool speed ATTRIBUTE_UNUSED)
3395 {
3396 struct s390_address ad;
3397 if (!s390_decompose_address (addr, &ad))
3398 return 1000;
3399
3400 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3401 }
3402
3403 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3404 otherwise return 0. */
3405
3406 int
3407 tls_symbolic_operand (rtx op)
3408 {
3409 if (GET_CODE (op) != SYMBOL_REF)
3410 return 0;
3411 return SYMBOL_REF_TLS_MODEL (op);
3412 }
3413 \f
3414 /* Split DImode access register reference REG (on 64-bit) into its constituent
3415 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3416 gen_highpart cannot be used as they assume all registers are word-sized,
3417 while our access registers have only half that size. */
3418
3419 void
3420 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3421 {
3422 gcc_assert (TARGET_64BIT);
3423 gcc_assert (ACCESS_REG_P (reg));
3424 gcc_assert (GET_MODE (reg) == DImode);
3425 gcc_assert (!(REGNO (reg) & 1));
3426
3427 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3428 *hi = gen_rtx_REG (SImode, REGNO (reg));
3429 }
3430
3431 /* Return true if OP contains a symbol reference */
3432
3433 bool
3434 symbolic_reference_mentioned_p (rtx op)
3435 {
3436 const char *fmt;
3437 int i;
3438
3439 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3440 return 1;
3441
3442 fmt = GET_RTX_FORMAT (GET_CODE (op));
3443 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3444 {
3445 if (fmt[i] == 'E')
3446 {
3447 int j;
3448
3449 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3450 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3451 return 1;
3452 }
3453
3454 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3455 return 1;
3456 }
3457
3458 return 0;
3459 }
3460
3461 /* Return true if OP contains a reference to a thread-local symbol. */
3462
3463 bool
3464 tls_symbolic_reference_mentioned_p (rtx op)
3465 {
3466 const char *fmt;
3467 int i;
3468
3469 if (GET_CODE (op) == SYMBOL_REF)
3470 return tls_symbolic_operand (op);
3471
3472 fmt = GET_RTX_FORMAT (GET_CODE (op));
3473 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3474 {
3475 if (fmt[i] == 'E')
3476 {
3477 int j;
3478
3479 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3480 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3481 return true;
3482 }
3483
3484 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3485 return true;
3486 }
3487
3488 return false;
3489 }
3490
3491
3492 /* Return true if OP is a legitimate general operand when
3493 generating PIC code. It is given that flag_pic is on
3494 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3495
3496 int
3497 legitimate_pic_operand_p (rtx op)
3498 {
3499 /* Accept all non-symbolic constants. */
3500 if (!SYMBOLIC_CONST (op))
3501 return 1;
3502
3503 /* Reject everything else; must be handled
3504 via emit_symbolic_move. */
3505 return 0;
3506 }
3507
3508 /* Returns true if the constant value OP is a legitimate general operand.
3509 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3510
3511 static bool
3512 s390_legitimate_constant_p (machine_mode mode, rtx op)
3513 {
3514 if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3515 {
3516 if (GET_MODE_SIZE (mode) != 16)
3517 return 0;
3518
3519 if (!const0_operand (op, mode)
3520 && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3521 && !s390_bytemask_vector_p (op, NULL))
3522 return 0;
3523 }
3524
3525 /* Accept all non-symbolic constants. */
3526 if (!SYMBOLIC_CONST (op))
3527 return 1;
3528
3529 /* Accept immediate LARL operands. */
3530 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3531 return 1;
3532
3533 /* Thread-local symbols are never legal constants. This is
3534 so that emit_call knows that computing such addresses
3535 might require a function call. */
3536 if (TLS_SYMBOLIC_CONST (op))
3537 return 0;
3538
3539 /* In the PIC case, symbolic constants must *not* be
3540 forced into the literal pool. We accept them here,
3541 so that they will be handled by emit_symbolic_move. */
3542 if (flag_pic)
3543 return 1;
3544
3545 /* All remaining non-PIC symbolic constants are
3546 forced into the literal pool. */
3547 return 0;
3548 }
3549
3550 /* Determine if it's legal to put X into the constant pool. This
3551 is not possible if X contains the address of a symbol that is
3552 not constant (TLS) or not known at final link time (PIC). */
3553
3554 static bool
3555 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3556 {
3557 switch (GET_CODE (x))
3558 {
3559 case CONST_INT:
3560 case CONST_DOUBLE:
3561 case CONST_VECTOR:
3562 /* Accept all non-symbolic constants. */
3563 return false;
3564
3565 case LABEL_REF:
3566 /* Labels are OK iff we are non-PIC. */
3567 return flag_pic != 0;
3568
3569 case SYMBOL_REF:
3570 /* 'Naked' TLS symbol references are never OK,
3571 non-TLS symbols are OK iff we are non-PIC. */
3572 if (tls_symbolic_operand (x))
3573 return true;
3574 else
3575 return flag_pic != 0;
3576
3577 case CONST:
3578 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3579 case PLUS:
3580 case MINUS:
3581 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3582 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3583
3584 case UNSPEC:
3585 switch (XINT (x, 1))
3586 {
3587 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3588 case UNSPEC_LTREL_OFFSET:
3589 case UNSPEC_GOT:
3590 case UNSPEC_GOTOFF:
3591 case UNSPEC_PLTOFF:
3592 case UNSPEC_TLSGD:
3593 case UNSPEC_TLSLDM:
3594 case UNSPEC_NTPOFF:
3595 case UNSPEC_DTPOFF:
3596 case UNSPEC_GOTNTPOFF:
3597 case UNSPEC_INDNTPOFF:
3598 return false;
3599
3600 /* If the literal pool shares the code section, be put
3601 execute template placeholders into the pool as well. */
3602 case UNSPEC_INSN:
3603 return TARGET_CPU_ZARCH;
3604
3605 default:
3606 return true;
3607 }
3608 break;
3609
3610 default:
3611 gcc_unreachable ();
3612 }
3613 }
3614
3615 /* Returns true if the constant value OP is a legitimate general
3616 operand during and after reload. The difference to
3617 legitimate_constant_p is that this function will not accept
3618 a constant that would need to be forced to the literal pool
3619 before it can be used as operand.
3620 This function accepts all constants which can be loaded directly
3621 into a GPR. */
3622
3623 bool
3624 legitimate_reload_constant_p (rtx op)
3625 {
3626 /* Accept la(y) operands. */
3627 if (GET_CODE (op) == CONST_INT
3628 && DISP_IN_RANGE (INTVAL (op)))
3629 return true;
3630
3631 /* Accept l(g)hi/l(g)fi operands. */
3632 if (GET_CODE (op) == CONST_INT
3633 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3634 return true;
3635
3636 /* Accept lliXX operands. */
3637 if (TARGET_ZARCH
3638 && GET_CODE (op) == CONST_INT
3639 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3640 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3641 return true;
3642
3643 if (TARGET_EXTIMM
3644 && GET_CODE (op) == CONST_INT
3645 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3646 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3647 return true;
3648
3649 /* Accept larl operands. */
3650 if (TARGET_CPU_ZARCH
3651 && larl_operand (op, VOIDmode))
3652 return true;
3653
3654 /* Accept floating-point zero operands that fit into a single GPR. */
3655 if (GET_CODE (op) == CONST_DOUBLE
3656 && s390_float_const_zero_p (op)
3657 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3658 return true;
3659
3660 /* Accept double-word operands that can be split. */
3661 if (GET_CODE (op) == CONST_INT
3662 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3663 {
3664 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3665 rtx hi = operand_subword (op, 0, 0, dword_mode);
3666 rtx lo = operand_subword (op, 1, 0, dword_mode);
3667 return legitimate_reload_constant_p (hi)
3668 && legitimate_reload_constant_p (lo);
3669 }
3670
3671 /* Everything else cannot be handled without reload. */
3672 return false;
3673 }
3674
3675 /* Returns true if the constant value OP is a legitimate fp operand
3676 during and after reload.
3677 This function accepts all constants which can be loaded directly
3678 into an FPR. */
3679
3680 static bool
3681 legitimate_reload_fp_constant_p (rtx op)
3682 {
3683 /* Accept floating-point zero operands if the load zero instruction
3684 can be used. Prior to z196 the load fp zero instruction caused a
3685 performance penalty if the result is used as BFP number. */
3686 if (TARGET_Z196
3687 && GET_CODE (op) == CONST_DOUBLE
3688 && s390_float_const_zero_p (op))
3689 return true;
3690
3691 return false;
3692 }
3693
3694 /* Returns true if the constant value OP is a legitimate vector operand
3695 during and after reload.
3696 This function accepts all constants which can be loaded directly
3697 into an VR. */
3698
3699 static bool
3700 legitimate_reload_vector_constant_p (rtx op)
3701 {
3702 /* FIXME: Support constant vectors with all the same 16 bit unsigned
3703 operands. These can be loaded with vrepi. */
3704
3705 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3706 && (const0_operand (op, GET_MODE (op))
3707 || constm1_operand (op, GET_MODE (op))
3708 || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3709 || s390_bytemask_vector_p (op, NULL)))
3710 return true;
3711
3712 return false;
3713 }
3714
3715 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3716 return the class of reg to actually use. */
3717
3718 static reg_class_t
3719 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3720 {
3721 switch (GET_CODE (op))
3722 {
3723 /* Constants we cannot reload into general registers
3724 must be forced into the literal pool. */
3725 case CONST_VECTOR:
3726 case CONST_DOUBLE:
3727 case CONST_INT:
3728 if (reg_class_subset_p (GENERAL_REGS, rclass)
3729 && legitimate_reload_constant_p (op))
3730 return GENERAL_REGS;
3731 else if (reg_class_subset_p (ADDR_REGS, rclass)
3732 && legitimate_reload_constant_p (op))
3733 return ADDR_REGS;
3734 else if (reg_class_subset_p (FP_REGS, rclass)
3735 && legitimate_reload_fp_constant_p (op))
3736 return FP_REGS;
3737 else if (reg_class_subset_p (VEC_REGS, rclass)
3738 && legitimate_reload_vector_constant_p (op))
3739 return VEC_REGS;
3740
3741 return NO_REGS;
3742
3743 /* If a symbolic constant or a PLUS is reloaded,
3744 it is most likely being used as an address, so
3745 prefer ADDR_REGS. If 'class' is not a superset
3746 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3747 case CONST:
3748 /* Symrefs cannot be pushed into the literal pool with -fPIC
3749 so we *MUST NOT* return NO_REGS for these cases
3750 (s390_cannot_force_const_mem will return true).
3751
3752 On the other hand we MUST return NO_REGS for symrefs with
3753 invalid addend which might have been pushed to the literal
3754 pool (no -fPIC). Usually we would expect them to be
3755 handled via secondary reload but this does not happen if
3756 they are used as literal pool slot replacement in reload
3757 inheritance (see emit_input_reload_insns). */
3758 if (TARGET_CPU_ZARCH
3759 && GET_CODE (XEXP (op, 0)) == PLUS
3760 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3761 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3762 {
3763 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3764 return ADDR_REGS;
3765 else
3766 return NO_REGS;
3767 }
3768 /* fallthrough */
3769 case LABEL_REF:
3770 case SYMBOL_REF:
3771 if (!legitimate_reload_constant_p (op))
3772 return NO_REGS;
3773 /* fallthrough */
3774 case PLUS:
3775 /* load address will be used. */
3776 if (reg_class_subset_p (ADDR_REGS, rclass))
3777 return ADDR_REGS;
3778 else
3779 return NO_REGS;
3780
3781 default:
3782 break;
3783 }
3784
3785 return rclass;
3786 }
3787
3788 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3789 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3790 aligned. */
3791
3792 bool
3793 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3794 {
3795 HOST_WIDE_INT addend;
3796 rtx symref;
3797
3798 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3799 return false;
3800
3801 if (addend & (alignment - 1))
3802 return false;
3803
3804 if (GET_CODE (symref) == SYMBOL_REF
3805 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3806 return true;
3807
3808 if (GET_CODE (symref) == UNSPEC
3809 && alignment <= UNITS_PER_LONG)
3810 return true;
3811
3812 return false;
3813 }
3814
3815 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3816 operand SCRATCH is used to reload the even part of the address and
3817 adding one. */
3818
3819 void
3820 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3821 {
3822 HOST_WIDE_INT addend;
3823 rtx symref;
3824
3825 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3826 gcc_unreachable ();
3827
3828 if (!(addend & 1))
3829 /* Easy case. The addend is even so larl will do fine. */
3830 emit_move_insn (reg, addr);
3831 else
3832 {
3833 /* We can leave the scratch register untouched if the target
3834 register is a valid base register. */
3835 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3836 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3837 scratch = reg;
3838
3839 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3840 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3841
3842 if (addend != 1)
3843 emit_move_insn (scratch,
3844 gen_rtx_CONST (Pmode,
3845 gen_rtx_PLUS (Pmode, symref,
3846 GEN_INT (addend - 1))));
3847 else
3848 emit_move_insn (scratch, symref);
3849
3850 /* Increment the address using la in order to avoid clobbering cc. */
3851 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3852 }
3853 }
3854
3855 /* Generate what is necessary to move between REG and MEM using
3856 SCRATCH. The direction is given by TOMEM. */
3857
3858 void
3859 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3860 {
3861 /* Reload might have pulled a constant out of the literal pool.
3862 Force it back in. */
3863 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3864 || GET_CODE (mem) == CONST_VECTOR
3865 || GET_CODE (mem) == CONST)
3866 mem = force_const_mem (GET_MODE (reg), mem);
3867
3868 gcc_assert (MEM_P (mem));
3869
3870 /* For a load from memory we can leave the scratch register
3871 untouched if the target register is a valid base register. */
3872 if (!tomem
3873 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3874 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3875 && GET_MODE (reg) == GET_MODE (scratch))
3876 scratch = reg;
3877
3878 /* Load address into scratch register. Since we can't have a
3879 secondary reload for a secondary reload we have to cover the case
3880 where larl would need a secondary reload here as well. */
3881 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3882
3883 /* Now we can use a standard load/store to do the move. */
3884 if (tomem)
3885 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3886 else
3887 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3888 }
3889
3890 /* Inform reload about cases where moving X with a mode MODE to a register in
3891 RCLASS requires an extra scratch or immediate register. Return the class
3892 needed for the immediate register. */
3893
3894 static reg_class_t
3895 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3896 machine_mode mode, secondary_reload_info *sri)
3897 {
3898 enum reg_class rclass = (enum reg_class) rclass_i;
3899
3900 /* Intermediate register needed. */
3901 if (reg_classes_intersect_p (CC_REGS, rclass))
3902 return GENERAL_REGS;
3903
3904 if (TARGET_VX)
3905 {
3906 /* The vst/vl vector move instructions allow only for short
3907 displacements. */
3908 if (MEM_P (x)
3909 && GET_CODE (XEXP (x, 0)) == PLUS
3910 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3911 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
3912 && reg_class_subset_p (rclass, VEC_REGS)
3913 && (!reg_class_subset_p (rclass, FP_REGS)
3914 || (GET_MODE_SIZE (mode) > 8
3915 && s390_class_max_nregs (FP_REGS, mode) == 1)))
3916 {
3917 if (in_p)
3918 sri->icode = (TARGET_64BIT ?
3919 CODE_FOR_reloaddi_la_in :
3920 CODE_FOR_reloadsi_la_in);
3921 else
3922 sri->icode = (TARGET_64BIT ?
3923 CODE_FOR_reloaddi_la_out :
3924 CODE_FOR_reloadsi_la_out);
3925 }
3926 }
3927
3928 if (TARGET_Z10)
3929 {
3930 HOST_WIDE_INT offset;
3931 rtx symref;
3932
3933 /* On z10 several optimizer steps may generate larl operands with
3934 an odd addend. */
3935 if (in_p
3936 && s390_loadrelative_operand_p (x, &symref, &offset)
3937 && mode == Pmode
3938 && !SYMBOL_REF_ALIGN1_P (symref)
3939 && (offset & 1) == 1)
3940 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3941 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3942
3943 /* Handle all the (mem (symref)) accesses we cannot use the z10
3944 instructions for. */
3945 if (MEM_P (x)
3946 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3947 && (mode == QImode
3948 || !reg_classes_intersect_p (GENERAL_REGS, rclass)
3949 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
3950 || !s390_check_symref_alignment (XEXP (x, 0),
3951 GET_MODE_SIZE (mode))))
3952 {
3953 #define __SECONDARY_RELOAD_CASE(M,m) \
3954 case M##mode: \
3955 if (TARGET_64BIT) \
3956 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3957 CODE_FOR_reload##m##di_tomem_z10; \
3958 else \
3959 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3960 CODE_FOR_reload##m##si_tomem_z10; \
3961 break;
3962
3963 switch (GET_MODE (x))
3964 {
3965 __SECONDARY_RELOAD_CASE (QI, qi);
3966 __SECONDARY_RELOAD_CASE (HI, hi);
3967 __SECONDARY_RELOAD_CASE (SI, si);
3968 __SECONDARY_RELOAD_CASE (DI, di);
3969 __SECONDARY_RELOAD_CASE (TI, ti);
3970 __SECONDARY_RELOAD_CASE (SF, sf);
3971 __SECONDARY_RELOAD_CASE (DF, df);
3972 __SECONDARY_RELOAD_CASE (TF, tf);
3973 __SECONDARY_RELOAD_CASE (SD, sd);
3974 __SECONDARY_RELOAD_CASE (DD, dd);
3975 __SECONDARY_RELOAD_CASE (TD, td);
3976 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
3977 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
3978 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
3979 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
3980 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
3981 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
3982 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
3983 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
3984 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
3985 __SECONDARY_RELOAD_CASE (V1SI, v1si);
3986 __SECONDARY_RELOAD_CASE (V2SI, v2si);
3987 __SECONDARY_RELOAD_CASE (V4SI, v4si);
3988 __SECONDARY_RELOAD_CASE (V1DI, v1di);
3989 __SECONDARY_RELOAD_CASE (V2DI, v2di);
3990 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
3991 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
3992 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
3993 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
3994 __SECONDARY_RELOAD_CASE (V1DF, v1df);
3995 __SECONDARY_RELOAD_CASE (V2DF, v2df);
3996 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
3997 default:
3998 gcc_unreachable ();
3999 }
4000 #undef __SECONDARY_RELOAD_CASE
4001 }
4002 }
4003
4004 /* We need a scratch register when loading a PLUS expression which
4005 is not a legitimate operand of the LOAD ADDRESS instruction. */
4006 /* LRA can deal with transformation of plus op very well -- so we
4007 don't need to prompt LRA in this case. */
4008 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4009 sri->icode = (TARGET_64BIT ?
4010 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4011
4012 /* Performing a multiword move from or to memory we have to make sure the
4013 second chunk in memory is addressable without causing a displacement
4014 overflow. If that would be the case we calculate the address in
4015 a scratch register. */
4016 if (MEM_P (x)
4017 && GET_CODE (XEXP (x, 0)) == PLUS
4018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4019 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4020 + GET_MODE_SIZE (mode) - 1))
4021 {
4022 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4023 in a s_operand address since we may fallback to lm/stm. So we only
4024 have to care about overflows in the b+i+d case. */
4025 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4026 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4027 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4028 /* For FP_REGS no lm/stm is available so this check is triggered
4029 for displacement overflows in b+i+d and b+d like addresses. */
4030 || (reg_classes_intersect_p (FP_REGS, rclass)
4031 && s390_class_max_nregs (FP_REGS, mode) > 1))
4032 {
4033 if (in_p)
4034 sri->icode = (TARGET_64BIT ?
4035 CODE_FOR_reloaddi_la_in :
4036 CODE_FOR_reloadsi_la_in);
4037 else
4038 sri->icode = (TARGET_64BIT ?
4039 CODE_FOR_reloaddi_la_out :
4040 CODE_FOR_reloadsi_la_out);
4041 }
4042 }
4043
4044 /* A scratch address register is needed when a symbolic constant is
4045 copied to r0 compiling with -fPIC. In other cases the target
4046 register might be used as temporary (see legitimize_pic_address). */
4047 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4048 sri->icode = (TARGET_64BIT ?
4049 CODE_FOR_reloaddi_PIC_addr :
4050 CODE_FOR_reloadsi_PIC_addr);
4051
4052 /* Either scratch or no register needed. */
4053 return NO_REGS;
4054 }
4055
4056 /* Generate code to load SRC, which is PLUS that is not a
4057 legitimate operand for the LA instruction, into TARGET.
4058 SCRATCH may be used as scratch register. */
4059
4060 void
4061 s390_expand_plus_operand (rtx target, rtx src,
4062 rtx scratch)
4063 {
4064 rtx sum1, sum2;
4065 struct s390_address ad;
4066
4067 /* src must be a PLUS; get its two operands. */
4068 gcc_assert (GET_CODE (src) == PLUS);
4069 gcc_assert (GET_MODE (src) == Pmode);
4070
4071 /* Check if any of the two operands is already scheduled
4072 for replacement by reload. This can happen e.g. when
4073 float registers occur in an address. */
4074 sum1 = find_replacement (&XEXP (src, 0));
4075 sum2 = find_replacement (&XEXP (src, 1));
4076 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4077
4078 /* If the address is already strictly valid, there's nothing to do. */
4079 if (!s390_decompose_address (src, &ad)
4080 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4081 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4082 {
4083 /* Otherwise, one of the operands cannot be an address register;
4084 we reload its value into the scratch register. */
4085 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4086 {
4087 emit_move_insn (scratch, sum1);
4088 sum1 = scratch;
4089 }
4090 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4091 {
4092 emit_move_insn (scratch, sum2);
4093 sum2 = scratch;
4094 }
4095
4096 /* According to the way these invalid addresses are generated
4097 in reload.c, it should never happen (at least on s390) that
4098 *neither* of the PLUS components, after find_replacements
4099 was applied, is an address register. */
4100 if (sum1 == scratch && sum2 == scratch)
4101 {
4102 debug_rtx (src);
4103 gcc_unreachable ();
4104 }
4105
4106 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4107 }
4108
4109 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4110 is only ever performed on addresses, so we can mark the
4111 sum as legitimate for LA in any case. */
4112 s390_load_address (target, src);
4113 }
4114
4115
4116 /* Return true if ADDR is a valid memory address.
4117 STRICT specifies whether strict register checking applies. */
4118
4119 static bool
4120 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4121 {
4122 struct s390_address ad;
4123
4124 if (TARGET_Z10
4125 && larl_operand (addr, VOIDmode)
4126 && (mode == VOIDmode
4127 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4128 return true;
4129
4130 if (!s390_decompose_address (addr, &ad))
4131 return false;
4132
4133 if (strict)
4134 {
4135 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4136 return false;
4137
4138 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4139 return false;
4140 }
4141 else
4142 {
4143 if (ad.base
4144 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4145 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4146 return false;
4147
4148 if (ad.indx
4149 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4150 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4151 return false;
4152 }
4153 return true;
4154 }
4155
4156 /* Return true if OP is a valid operand for the LA instruction.
4157 In 31-bit, we need to prove that the result is used as an
4158 address, as LA performs only a 31-bit addition. */
4159
4160 bool
4161 legitimate_la_operand_p (rtx op)
4162 {
4163 struct s390_address addr;
4164 if (!s390_decompose_address (op, &addr))
4165 return false;
4166
4167 return (TARGET_64BIT || addr.pointer);
4168 }
4169
4170 /* Return true if it is valid *and* preferable to use LA to
4171 compute the sum of OP1 and OP2. */
4172
4173 bool
4174 preferred_la_operand_p (rtx op1, rtx op2)
4175 {
4176 struct s390_address addr;
4177
4178 if (op2 != const0_rtx)
4179 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4180
4181 if (!s390_decompose_address (op1, &addr))
4182 return false;
4183 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4184 return false;
4185 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4186 return false;
4187
4188 /* Avoid LA instructions with index register on z196; it is
4189 preferable to use regular add instructions when possible.
4190 Starting with zEC12 the la with index register is "uncracked"
4191 again. */
4192 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4193 return false;
4194
4195 if (!TARGET_64BIT && !addr.pointer)
4196 return false;
4197
4198 if (addr.pointer)
4199 return true;
4200
4201 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4202 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4203 return true;
4204
4205 return false;
4206 }
4207
4208 /* Emit a forced load-address operation to load SRC into DST.
4209 This will use the LOAD ADDRESS instruction even in situations
4210 where legitimate_la_operand_p (SRC) returns false. */
4211
4212 void
4213 s390_load_address (rtx dst, rtx src)
4214 {
4215 if (TARGET_64BIT)
4216 emit_move_insn (dst, src);
4217 else
4218 emit_insn (gen_force_la_31 (dst, src));
4219 }
4220
4221 /* Return a legitimate reference for ORIG (an address) using the
4222 register REG. If REG is 0, a new pseudo is generated.
4223
4224 There are two types of references that must be handled:
4225
4226 1. Global data references must load the address from the GOT, via
4227 the PIC reg. An insn is emitted to do this load, and the reg is
4228 returned.
4229
4230 2. Static data references, constant pool addresses, and code labels
4231 compute the address as an offset from the GOT, whose base is in
4232 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4233 differentiate them from global data objects. The returned
4234 address is the PIC reg + an unspec constant.
4235
4236 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4237 reg also appears in the address. */
4238
4239 rtx
4240 legitimize_pic_address (rtx orig, rtx reg)
4241 {
4242 rtx addr = orig;
4243 rtx addend = const0_rtx;
4244 rtx new_rtx = orig;
4245
4246 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4247
4248 if (GET_CODE (addr) == CONST)
4249 addr = XEXP (addr, 0);
4250
4251 if (GET_CODE (addr) == PLUS)
4252 {
4253 addend = XEXP (addr, 1);
4254 addr = XEXP (addr, 0);
4255 }
4256
4257 if ((GET_CODE (addr) == LABEL_REF
4258 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4259 || (GET_CODE (addr) == UNSPEC &&
4260 (XINT (addr, 1) == UNSPEC_GOTENT
4261 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4262 && GET_CODE (addend) == CONST_INT)
4263 {
4264 /* This can be locally addressed. */
4265
4266 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4267 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4268 gen_rtx_CONST (Pmode, addr) : addr);
4269
4270 if (TARGET_CPU_ZARCH
4271 && larl_operand (const_addr, VOIDmode)
4272 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4273 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4274 {
4275 if (INTVAL (addend) & 1)
4276 {
4277 /* LARL can't handle odd offsets, so emit a pair of LARL
4278 and LA. */
4279 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4280
4281 if (!DISP_IN_RANGE (INTVAL (addend)))
4282 {
4283 HOST_WIDE_INT even = INTVAL (addend) - 1;
4284 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4285 addr = gen_rtx_CONST (Pmode, addr);
4286 addend = const1_rtx;
4287 }
4288
4289 emit_move_insn (temp, addr);
4290 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4291
4292 if (reg != 0)
4293 {
4294 s390_load_address (reg, new_rtx);
4295 new_rtx = reg;
4296 }
4297 }
4298 else
4299 {
4300 /* If the offset is even, we can just use LARL. This
4301 will happen automatically. */
4302 }
4303 }
4304 else
4305 {
4306 /* No larl - Access local symbols relative to the GOT. */
4307
4308 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4309
4310 if (reload_in_progress || reload_completed)
4311 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4312
4313 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4314 if (addend != const0_rtx)
4315 addr = gen_rtx_PLUS (Pmode, addr, addend);
4316 addr = gen_rtx_CONST (Pmode, addr);
4317 addr = force_const_mem (Pmode, addr);
4318 emit_move_insn (temp, addr);
4319
4320 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4321 if (reg != 0)
4322 {
4323 s390_load_address (reg, new_rtx);
4324 new_rtx = reg;
4325 }
4326 }
4327 }
4328 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4329 {
4330 /* A non-local symbol reference without addend.
4331
4332 The symbol ref is wrapped into an UNSPEC to make sure the
4333 proper operand modifier (@GOT or @GOTENT) will be emitted.
4334 This will tell the linker to put the symbol into the GOT.
4335
4336 Additionally the code dereferencing the GOT slot is emitted here.
4337
4338 An addend to the symref needs to be added afterwards.
4339 legitimize_pic_address calls itself recursively to handle
4340 that case. So no need to do it here. */
4341
4342 if (reg == 0)
4343 reg = gen_reg_rtx (Pmode);
4344
4345 if (TARGET_Z10)
4346 {
4347 /* Use load relative if possible.
4348 lgrl <target>, sym@GOTENT */
4349 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4350 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4351 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4352
4353 emit_move_insn (reg, new_rtx);
4354 new_rtx = reg;
4355 }
4356 else if (flag_pic == 1)
4357 {
4358 /* Assume GOT offset is a valid displacement operand (< 4k
4359 or < 512k with z990). This is handled the same way in
4360 both 31- and 64-bit code (@GOT).
4361 lg <target>, sym@GOT(r12) */
4362
4363 if (reload_in_progress || reload_completed)
4364 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4365
4366 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4367 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4368 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4369 new_rtx = gen_const_mem (Pmode, new_rtx);
4370 emit_move_insn (reg, new_rtx);
4371 new_rtx = reg;
4372 }
4373 else if (TARGET_CPU_ZARCH)
4374 {
4375 /* If the GOT offset might be >= 4k, we determine the position
4376 of the GOT entry via a PC-relative LARL (@GOTENT).
4377 larl temp, sym@GOTENT
4378 lg <target>, 0(temp) */
4379
4380 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4381
4382 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4383 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4384
4385 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4386 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4387 emit_move_insn (temp, new_rtx);
4388
4389 new_rtx = gen_const_mem (Pmode, temp);
4390 emit_move_insn (reg, new_rtx);
4391
4392 new_rtx = reg;
4393 }
4394 else
4395 {
4396 /* If the GOT offset might be >= 4k, we have to load it
4397 from the literal pool (@GOT).
4398
4399 lg temp, lit-litbase(r13)
4400 lg <target>, 0(temp)
4401 lit: .long sym@GOT */
4402
4403 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4404
4405 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4406 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4407
4408 if (reload_in_progress || reload_completed)
4409 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4410
4411 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4412 addr = gen_rtx_CONST (Pmode, addr);
4413 addr = force_const_mem (Pmode, addr);
4414 emit_move_insn (temp, addr);
4415
4416 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4417 new_rtx = gen_const_mem (Pmode, new_rtx);
4418 emit_move_insn (reg, new_rtx);
4419 new_rtx = reg;
4420 }
4421 }
4422 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4423 {
4424 gcc_assert (XVECLEN (addr, 0) == 1);
4425 switch (XINT (addr, 1))
4426 {
4427 /* These address symbols (or PLT slots) relative to the GOT
4428 (not GOT slots!). In general this will exceed the
4429 displacement range so these value belong into the literal
4430 pool. */
4431 case UNSPEC_GOTOFF:
4432 case UNSPEC_PLTOFF:
4433 new_rtx = force_const_mem (Pmode, orig);
4434 break;
4435
4436 /* For -fPIC the GOT size might exceed the displacement
4437 range so make sure the value is in the literal pool. */
4438 case UNSPEC_GOT:
4439 if (flag_pic == 2)
4440 new_rtx = force_const_mem (Pmode, orig);
4441 break;
4442
4443 /* For @GOTENT larl is used. This is handled like local
4444 symbol refs. */
4445 case UNSPEC_GOTENT:
4446 gcc_unreachable ();
4447 break;
4448
4449 /* @PLT is OK as is on 64-bit, must be converted to
4450 GOT-relative @PLTOFF on 31-bit. */
4451 case UNSPEC_PLT:
4452 if (!TARGET_CPU_ZARCH)
4453 {
4454 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4455
4456 if (reload_in_progress || reload_completed)
4457 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4458
4459 addr = XVECEXP (addr, 0, 0);
4460 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4461 UNSPEC_PLTOFF);
4462 if (addend != const0_rtx)
4463 addr = gen_rtx_PLUS (Pmode, addr, addend);
4464 addr = gen_rtx_CONST (Pmode, addr);
4465 addr = force_const_mem (Pmode, addr);
4466 emit_move_insn (temp, addr);
4467
4468 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4469 if (reg != 0)
4470 {
4471 s390_load_address (reg, new_rtx);
4472 new_rtx = reg;
4473 }
4474 }
4475 else
4476 /* On 64 bit larl can be used. This case is handled like
4477 local symbol refs. */
4478 gcc_unreachable ();
4479 break;
4480
4481 /* Everything else cannot happen. */
4482 default:
4483 gcc_unreachable ();
4484 }
4485 }
4486 else if (addend != const0_rtx)
4487 {
4488 /* Otherwise, compute the sum. */
4489
4490 rtx base = legitimize_pic_address (addr, reg);
4491 new_rtx = legitimize_pic_address (addend,
4492 base == reg ? NULL_RTX : reg);
4493 if (GET_CODE (new_rtx) == CONST_INT)
4494 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4495 else
4496 {
4497 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4498 {
4499 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4500 new_rtx = XEXP (new_rtx, 1);
4501 }
4502 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4503 }
4504
4505 if (GET_CODE (new_rtx) == CONST)
4506 new_rtx = XEXP (new_rtx, 0);
4507 new_rtx = force_operand (new_rtx, 0);
4508 }
4509
4510 return new_rtx;
4511 }
4512
4513 /* Load the thread pointer into a register. */
4514
4515 rtx
4516 s390_get_thread_pointer (void)
4517 {
4518 rtx tp = gen_reg_rtx (Pmode);
4519
4520 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4521 mark_reg_pointer (tp, BITS_PER_WORD);
4522
4523 return tp;
4524 }
4525
4526 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4527 in s390_tls_symbol which always refers to __tls_get_offset.
4528 The returned offset is written to RESULT_REG and an USE rtx is
4529 generated for TLS_CALL. */
4530
4531 static GTY(()) rtx s390_tls_symbol;
4532
4533 static void
4534 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4535 {
4536 rtx insn;
4537
4538 if (!flag_pic)
4539 emit_insn (s390_load_got ());
4540
4541 if (!s390_tls_symbol)
4542 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4543
4544 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4545 gen_rtx_REG (Pmode, RETURN_REGNUM));
4546
4547 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4548 RTL_CONST_CALL_P (insn) = 1;
4549 }
4550
4551 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4552 this (thread-local) address. REG may be used as temporary. */
4553
4554 static rtx
4555 legitimize_tls_address (rtx addr, rtx reg)
4556 {
4557 rtx new_rtx, tls_call, temp, base, r2, insn;
4558
4559 if (GET_CODE (addr) == SYMBOL_REF)
4560 switch (tls_symbolic_operand (addr))
4561 {
4562 case TLS_MODEL_GLOBAL_DYNAMIC:
4563 start_sequence ();
4564 r2 = gen_rtx_REG (Pmode, 2);
4565 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4566 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4567 new_rtx = force_const_mem (Pmode, new_rtx);
4568 emit_move_insn (r2, new_rtx);
4569 s390_emit_tls_call_insn (r2, tls_call);
4570 insn = get_insns ();
4571 end_sequence ();
4572
4573 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4574 temp = gen_reg_rtx (Pmode);
4575 emit_libcall_block (insn, temp, r2, new_rtx);
4576
4577 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4578 if (reg != 0)
4579 {
4580 s390_load_address (reg, new_rtx);
4581 new_rtx = reg;
4582 }
4583 break;
4584
4585 case TLS_MODEL_LOCAL_DYNAMIC:
4586 start_sequence ();
4587 r2 = gen_rtx_REG (Pmode, 2);
4588 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4589 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4590 new_rtx = force_const_mem (Pmode, new_rtx);
4591 emit_move_insn (r2, new_rtx);
4592 s390_emit_tls_call_insn (r2, tls_call);
4593 insn = get_insns ();
4594 end_sequence ();
4595
4596 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4597 temp = gen_reg_rtx (Pmode);
4598 emit_libcall_block (insn, temp, r2, new_rtx);
4599
4600 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4601 base = gen_reg_rtx (Pmode);
4602 s390_load_address (base, new_rtx);
4603
4604 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4605 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4606 new_rtx = force_const_mem (Pmode, new_rtx);
4607 temp = gen_reg_rtx (Pmode);
4608 emit_move_insn (temp, new_rtx);
4609
4610 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4611 if (reg != 0)
4612 {
4613 s390_load_address (reg, new_rtx);
4614 new_rtx = reg;
4615 }
4616 break;
4617
4618 case TLS_MODEL_INITIAL_EXEC:
4619 if (flag_pic == 1)
4620 {
4621 /* Assume GOT offset < 4k. This is handled the same way
4622 in both 31- and 64-bit code. */
4623
4624 if (reload_in_progress || reload_completed)
4625 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4626
4627 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4628 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4629 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4630 new_rtx = gen_const_mem (Pmode, new_rtx);
4631 temp = gen_reg_rtx (Pmode);
4632 emit_move_insn (temp, new_rtx);
4633 }
4634 else if (TARGET_CPU_ZARCH)
4635 {
4636 /* If the GOT offset might be >= 4k, we determine the position
4637 of the GOT entry via a PC-relative LARL. */
4638
4639 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4640 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4641 temp = gen_reg_rtx (Pmode);
4642 emit_move_insn (temp, new_rtx);
4643
4644 new_rtx = gen_const_mem (Pmode, temp);
4645 temp = gen_reg_rtx (Pmode);
4646 emit_move_insn (temp, new_rtx);
4647 }
4648 else if (flag_pic)
4649 {
4650 /* If the GOT offset might be >= 4k, we have to load it
4651 from the literal pool. */
4652
4653 if (reload_in_progress || reload_completed)
4654 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4655
4656 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4657 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4658 new_rtx = force_const_mem (Pmode, new_rtx);
4659 temp = gen_reg_rtx (Pmode);
4660 emit_move_insn (temp, new_rtx);
4661
4662 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4663 new_rtx = gen_const_mem (Pmode, new_rtx);
4664
4665 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4666 temp = gen_reg_rtx (Pmode);
4667 emit_insn (gen_rtx_SET (temp, new_rtx));
4668 }
4669 else
4670 {
4671 /* In position-dependent code, load the absolute address of
4672 the GOT entry from the literal pool. */
4673
4674 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4675 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4676 new_rtx = force_const_mem (Pmode, new_rtx);
4677 temp = gen_reg_rtx (Pmode);
4678 emit_move_insn (temp, new_rtx);
4679
4680 new_rtx = temp;
4681 new_rtx = gen_const_mem (Pmode, new_rtx);
4682 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4683 temp = gen_reg_rtx (Pmode);
4684 emit_insn (gen_rtx_SET (temp, new_rtx));
4685 }
4686
4687 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4688 if (reg != 0)
4689 {
4690 s390_load_address (reg, new_rtx);
4691 new_rtx = reg;
4692 }
4693 break;
4694
4695 case TLS_MODEL_LOCAL_EXEC:
4696 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4697 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4698 new_rtx = force_const_mem (Pmode, new_rtx);
4699 temp = gen_reg_rtx (Pmode);
4700 emit_move_insn (temp, new_rtx);
4701
4702 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4703 if (reg != 0)
4704 {
4705 s390_load_address (reg, new_rtx);
4706 new_rtx = reg;
4707 }
4708 break;
4709
4710 default:
4711 gcc_unreachable ();
4712 }
4713
4714 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4715 {
4716 switch (XINT (XEXP (addr, 0), 1))
4717 {
4718 case UNSPEC_INDNTPOFF:
4719 gcc_assert (TARGET_CPU_ZARCH);
4720 new_rtx = addr;
4721 break;
4722
4723 default:
4724 gcc_unreachable ();
4725 }
4726 }
4727
4728 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4729 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4730 {
4731 new_rtx = XEXP (XEXP (addr, 0), 0);
4732 if (GET_CODE (new_rtx) != SYMBOL_REF)
4733 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4734
4735 new_rtx = legitimize_tls_address (new_rtx, reg);
4736 new_rtx = plus_constant (Pmode, new_rtx,
4737 INTVAL (XEXP (XEXP (addr, 0), 1)));
4738 new_rtx = force_operand (new_rtx, 0);
4739 }
4740
4741 else
4742 gcc_unreachable (); /* for now ... */
4743
4744 return new_rtx;
4745 }
4746
4747 /* Emit insns making the address in operands[1] valid for a standard
4748 move to operands[0]. operands[1] is replaced by an address which
4749 should be used instead of the former RTX to emit the move
4750 pattern. */
4751
4752 void
4753 emit_symbolic_move (rtx *operands)
4754 {
4755 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4756
4757 if (GET_CODE (operands[0]) == MEM)
4758 operands[1] = force_reg (Pmode, operands[1]);
4759 else if (TLS_SYMBOLIC_CONST (operands[1]))
4760 operands[1] = legitimize_tls_address (operands[1], temp);
4761 else if (flag_pic)
4762 operands[1] = legitimize_pic_address (operands[1], temp);
4763 }
4764
4765 /* Try machine-dependent ways of modifying an illegitimate address X
4766 to be legitimate. If we find one, return the new, valid address.
4767
4768 OLDX is the address as it was before break_out_memory_refs was called.
4769 In some cases it is useful to look at this to decide what needs to be done.
4770
4771 MODE is the mode of the operand pointed to by X.
4772
4773 When -fpic is used, special handling is needed for symbolic references.
4774 See comments by legitimize_pic_address for details. */
4775
4776 static rtx
4777 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4778 machine_mode mode ATTRIBUTE_UNUSED)
4779 {
4780 rtx constant_term = const0_rtx;
4781
4782 if (TLS_SYMBOLIC_CONST (x))
4783 {
4784 x = legitimize_tls_address (x, 0);
4785
4786 if (s390_legitimate_address_p (mode, x, FALSE))
4787 return x;
4788 }
4789 else if (GET_CODE (x) == PLUS
4790 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4791 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4792 {
4793 return x;
4794 }
4795 else if (flag_pic)
4796 {
4797 if (SYMBOLIC_CONST (x)
4798 || (GET_CODE (x) == PLUS
4799 && (SYMBOLIC_CONST (XEXP (x, 0))
4800 || SYMBOLIC_CONST (XEXP (x, 1)))))
4801 x = legitimize_pic_address (x, 0);
4802
4803 if (s390_legitimate_address_p (mode, x, FALSE))
4804 return x;
4805 }
4806
4807 x = eliminate_constant_term (x, &constant_term);
4808
4809 /* Optimize loading of large displacements by splitting them
4810 into the multiple of 4K and the rest; this allows the
4811 former to be CSE'd if possible.
4812
4813 Don't do this if the displacement is added to a register
4814 pointing into the stack frame, as the offsets will
4815 change later anyway. */
4816
4817 if (GET_CODE (constant_term) == CONST_INT
4818 && !TARGET_LONG_DISPLACEMENT
4819 && !DISP_IN_RANGE (INTVAL (constant_term))
4820 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4821 {
4822 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4823 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4824
4825 rtx temp = gen_reg_rtx (Pmode);
4826 rtx val = force_operand (GEN_INT (upper), temp);
4827 if (val != temp)
4828 emit_move_insn (temp, val);
4829
4830 x = gen_rtx_PLUS (Pmode, x, temp);
4831 constant_term = GEN_INT (lower);
4832 }
4833
4834 if (GET_CODE (x) == PLUS)
4835 {
4836 if (GET_CODE (XEXP (x, 0)) == REG)
4837 {
4838 rtx temp = gen_reg_rtx (Pmode);
4839 rtx val = force_operand (XEXP (x, 1), temp);
4840 if (val != temp)
4841 emit_move_insn (temp, val);
4842
4843 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4844 }
4845
4846 else if (GET_CODE (XEXP (x, 1)) == REG)
4847 {
4848 rtx temp = gen_reg_rtx (Pmode);
4849 rtx val = force_operand (XEXP (x, 0), temp);
4850 if (val != temp)
4851 emit_move_insn (temp, val);
4852
4853 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4854 }
4855 }
4856
4857 if (constant_term != const0_rtx)
4858 x = gen_rtx_PLUS (Pmode, x, constant_term);
4859
4860 return x;
4861 }
4862
4863 /* Try a machine-dependent way of reloading an illegitimate address AD
4864 operand. If we find one, push the reload and return the new address.
4865
4866 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4867 and TYPE is the reload type of the current reload. */
4868
4869 rtx
4870 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
4871 int opnum, int type)
4872 {
4873 if (!optimize || TARGET_LONG_DISPLACEMENT)
4874 return NULL_RTX;
4875
4876 if (GET_CODE (ad) == PLUS)
4877 {
4878 rtx tem = simplify_binary_operation (PLUS, Pmode,
4879 XEXP (ad, 0), XEXP (ad, 1));
4880 if (tem)
4881 ad = tem;
4882 }
4883
4884 if (GET_CODE (ad) == PLUS
4885 && GET_CODE (XEXP (ad, 0)) == REG
4886 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4887 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4888 {
4889 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4890 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4891 rtx cst, tem, new_rtx;
4892
4893 cst = GEN_INT (upper);
4894 if (!legitimate_reload_constant_p (cst))
4895 cst = force_const_mem (Pmode, cst);
4896
4897 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4898 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4899
4900 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4901 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4902 opnum, (enum reload_type) type);
4903 return new_rtx;
4904 }
4905
4906 return NULL_RTX;
4907 }
4908
4909 /* Emit code to move LEN bytes from DST to SRC. */
4910
4911 bool
4912 s390_expand_movmem (rtx dst, rtx src, rtx len)
4913 {
4914 /* When tuning for z10 or higher we rely on the Glibc functions to
4915 do the right thing. Only for constant lengths below 64k we will
4916 generate inline code. */
4917 if (s390_tune >= PROCESSOR_2097_Z10
4918 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4919 return false;
4920
4921 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4922 {
4923 if (INTVAL (len) > 0)
4924 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4925 }
4926
4927 else if (TARGET_MVCLE)
4928 {
4929 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4930 }
4931
4932 else
4933 {
4934 rtx dst_addr, src_addr, count, blocks, temp;
4935 rtx_code_label *loop_start_label = gen_label_rtx ();
4936 rtx_code_label *loop_end_label = gen_label_rtx ();
4937 rtx_code_label *end_label = gen_label_rtx ();
4938 machine_mode mode;
4939
4940 mode = GET_MODE (len);
4941 if (mode == VOIDmode)
4942 mode = Pmode;
4943
4944 dst_addr = gen_reg_rtx (Pmode);
4945 src_addr = gen_reg_rtx (Pmode);
4946 count = gen_reg_rtx (mode);
4947 blocks = gen_reg_rtx (mode);
4948
4949 convert_move (count, len, 1);
4950 emit_cmp_and_jump_insns (count, const0_rtx,
4951 EQ, NULL_RTX, mode, 1, end_label);
4952
4953 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4954 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4955 dst = change_address (dst, VOIDmode, dst_addr);
4956 src = change_address (src, VOIDmode, src_addr);
4957
4958 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4959 OPTAB_DIRECT);
4960 if (temp != count)
4961 emit_move_insn (count, temp);
4962
4963 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4964 OPTAB_DIRECT);
4965 if (temp != blocks)
4966 emit_move_insn (blocks, temp);
4967
4968 emit_cmp_and_jump_insns (blocks, const0_rtx,
4969 EQ, NULL_RTX, mode, 1, loop_end_label);
4970
4971 emit_label (loop_start_label);
4972
4973 if (TARGET_Z10
4974 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4975 {
4976 rtx prefetch;
4977
4978 /* Issue a read prefetch for the +3 cache line. */
4979 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4980 const0_rtx, const0_rtx);
4981 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4982 emit_insn (prefetch);
4983
4984 /* Issue a write prefetch for the +3 cache line. */
4985 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4986 const1_rtx, const0_rtx);
4987 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4988 emit_insn (prefetch);
4989 }
4990
4991 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4992 s390_load_address (dst_addr,
4993 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4994 s390_load_address (src_addr,
4995 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4996
4997 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4998 OPTAB_DIRECT);
4999 if (temp != blocks)
5000 emit_move_insn (blocks, temp);
5001
5002 emit_cmp_and_jump_insns (blocks, const0_rtx,
5003 EQ, NULL_RTX, mode, 1, loop_end_label);
5004
5005 emit_jump (loop_start_label);
5006 emit_label (loop_end_label);
5007
5008 emit_insn (gen_movmem_short (dst, src,
5009 convert_to_mode (Pmode, count, 1)));
5010 emit_label (end_label);
5011 }
5012 return true;
5013 }
5014
5015 /* Emit code to set LEN bytes at DST to VAL.
5016 Make use of clrmem if VAL is zero. */
5017
5018 void
5019 s390_expand_setmem (rtx dst, rtx len, rtx val)
5020 {
5021 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5022 return;
5023
5024 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5025
5026 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5027 {
5028 if (val == const0_rtx && INTVAL (len) <= 256)
5029 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5030 else
5031 {
5032 /* Initialize memory by storing the first byte. */
5033 emit_move_insn (adjust_address (dst, QImode, 0), val);
5034
5035 if (INTVAL (len) > 1)
5036 {
5037 /* Initiate 1 byte overlap move.
5038 The first byte of DST is propagated through DSTP1.
5039 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5040 DST is set to size 1 so the rest of the memory location
5041 does not count as source operand. */
5042 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5043 set_mem_size (dst, 1);
5044
5045 emit_insn (gen_movmem_short (dstp1, dst,
5046 GEN_INT (INTVAL (len) - 2)));
5047 }
5048 }
5049 }
5050
5051 else if (TARGET_MVCLE)
5052 {
5053 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5054 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
5055 }
5056
5057 else
5058 {
5059 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5060 rtx_code_label *loop_start_label = gen_label_rtx ();
5061 rtx_code_label *loop_end_label = gen_label_rtx ();
5062 rtx_code_label *end_label = gen_label_rtx ();
5063 machine_mode mode;
5064
5065 mode = GET_MODE (len);
5066 if (mode == VOIDmode)
5067 mode = Pmode;
5068
5069 dst_addr = gen_reg_rtx (Pmode);
5070 count = gen_reg_rtx (mode);
5071 blocks = gen_reg_rtx (mode);
5072
5073 convert_move (count, len, 1);
5074 emit_cmp_and_jump_insns (count, const0_rtx,
5075 EQ, NULL_RTX, mode, 1, end_label);
5076
5077 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5078 dst = change_address (dst, VOIDmode, dst_addr);
5079
5080 if (val == const0_rtx)
5081 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5082 OPTAB_DIRECT);
5083 else
5084 {
5085 dstp1 = adjust_address (dst, VOIDmode, 1);
5086 set_mem_size (dst, 1);
5087
5088 /* Initialize memory by storing the first byte. */
5089 emit_move_insn (adjust_address (dst, QImode, 0), val);
5090
5091 /* If count is 1 we are done. */
5092 emit_cmp_and_jump_insns (count, const1_rtx,
5093 EQ, NULL_RTX, mode, 1, end_label);
5094
5095 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5096 OPTAB_DIRECT);
5097 }
5098 if (temp != count)
5099 emit_move_insn (count, temp);
5100
5101 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5102 OPTAB_DIRECT);
5103 if (temp != blocks)
5104 emit_move_insn (blocks, temp);
5105
5106 emit_cmp_and_jump_insns (blocks, const0_rtx,
5107 EQ, NULL_RTX, mode, 1, loop_end_label);
5108
5109 emit_label (loop_start_label);
5110
5111 if (TARGET_Z10
5112 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5113 {
5114 /* Issue a write prefetch for the +4 cache line. */
5115 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5116 GEN_INT (1024)),
5117 const1_rtx, const0_rtx);
5118 emit_insn (prefetch);
5119 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5120 }
5121
5122 if (val == const0_rtx)
5123 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5124 else
5125 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5126 s390_load_address (dst_addr,
5127 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5128
5129 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5130 OPTAB_DIRECT);
5131 if (temp != blocks)
5132 emit_move_insn (blocks, temp);
5133
5134 emit_cmp_and_jump_insns (blocks, const0_rtx,
5135 EQ, NULL_RTX, mode, 1, loop_end_label);
5136
5137 emit_jump (loop_start_label);
5138 emit_label (loop_end_label);
5139
5140 if (val == const0_rtx)
5141 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5142 else
5143 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5144 emit_label (end_label);
5145 }
5146 }
5147
5148 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5149 and return the result in TARGET. */
5150
5151 bool
5152 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5153 {
5154 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5155 rtx tmp;
5156
5157 /* When tuning for z10 or higher we rely on the Glibc functions to
5158 do the right thing. Only for constant lengths below 64k we will
5159 generate inline code. */
5160 if (s390_tune >= PROCESSOR_2097_Z10
5161 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5162 return false;
5163
5164 /* As the result of CMPINT is inverted compared to what we need,
5165 we have to swap the operands. */
5166 tmp = op0; op0 = op1; op1 = tmp;
5167
5168 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5169 {
5170 if (INTVAL (len) > 0)
5171 {
5172 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5173 emit_insn (gen_cmpint (target, ccreg));
5174 }
5175 else
5176 emit_move_insn (target, const0_rtx);
5177 }
5178 else if (TARGET_MVCLE)
5179 {
5180 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5181 emit_insn (gen_cmpint (target, ccreg));
5182 }
5183 else
5184 {
5185 rtx addr0, addr1, count, blocks, temp;
5186 rtx_code_label *loop_start_label = gen_label_rtx ();
5187 rtx_code_label *loop_end_label = gen_label_rtx ();
5188 rtx_code_label *end_label = gen_label_rtx ();
5189 machine_mode mode;
5190
5191 mode = GET_MODE (len);
5192 if (mode == VOIDmode)
5193 mode = Pmode;
5194
5195 addr0 = gen_reg_rtx (Pmode);
5196 addr1 = gen_reg_rtx (Pmode);
5197 count = gen_reg_rtx (mode);
5198 blocks = gen_reg_rtx (mode);
5199
5200 convert_move (count, len, 1);
5201 emit_cmp_and_jump_insns (count, const0_rtx,
5202 EQ, NULL_RTX, mode, 1, end_label);
5203
5204 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5205 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5206 op0 = change_address (op0, VOIDmode, addr0);
5207 op1 = change_address (op1, VOIDmode, addr1);
5208
5209 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5210 OPTAB_DIRECT);
5211 if (temp != count)
5212 emit_move_insn (count, temp);
5213
5214 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5215 OPTAB_DIRECT);
5216 if (temp != blocks)
5217 emit_move_insn (blocks, temp);
5218
5219 emit_cmp_and_jump_insns (blocks, const0_rtx,
5220 EQ, NULL_RTX, mode, 1, loop_end_label);
5221
5222 emit_label (loop_start_label);
5223
5224 if (TARGET_Z10
5225 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5226 {
5227 rtx prefetch;
5228
5229 /* Issue a read prefetch for the +2 cache line of operand 1. */
5230 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5231 const0_rtx, const0_rtx);
5232 emit_insn (prefetch);
5233 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5234
5235 /* Issue a read prefetch for the +2 cache line of operand 2. */
5236 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5237 const0_rtx, const0_rtx);
5238 emit_insn (prefetch);
5239 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5240 }
5241
5242 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5243 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5244 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5245 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5246 temp = gen_rtx_SET (pc_rtx, temp);
5247 emit_jump_insn (temp);
5248
5249 s390_load_address (addr0,
5250 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5251 s390_load_address (addr1,
5252 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5253
5254 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5255 OPTAB_DIRECT);
5256 if (temp != blocks)
5257 emit_move_insn (blocks, temp);
5258
5259 emit_cmp_and_jump_insns (blocks, const0_rtx,
5260 EQ, NULL_RTX, mode, 1, loop_end_label);
5261
5262 emit_jump (loop_start_label);
5263 emit_label (loop_end_label);
5264
5265 emit_insn (gen_cmpmem_short (op0, op1,
5266 convert_to_mode (Pmode, count, 1)));
5267 emit_label (end_label);
5268
5269 emit_insn (gen_cmpint (target, ccreg));
5270 }
5271 return true;
5272 }
5273
5274 /* Emit a conditional jump to LABEL for condition code mask MASK using
5275 comparsion operator COMPARISON. Return the emitted jump insn. */
5276
5277 static rtx
5278 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5279 {
5280 rtx temp;
5281
5282 gcc_assert (comparison == EQ || comparison == NE);
5283 gcc_assert (mask > 0 && mask < 15);
5284
5285 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5286 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5287 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5288 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5289 temp = gen_rtx_SET (pc_rtx, temp);
5290 return emit_jump_insn (temp);
5291 }
5292
5293 /* Emit the instructions to implement strlen of STRING and store the
5294 result in TARGET. The string has the known ALIGNMENT. This
5295 version uses vector instructions and is therefore not appropriate
5296 for targets prior to z13. */
5297
5298 void
5299 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5300 {
5301 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5302 int very_likely = REG_BR_PROB_BASE - 1;
5303 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5304 rtx str_reg = gen_reg_rtx (V16QImode);
5305 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5306 rtx str_idx_reg = gen_reg_rtx (Pmode);
5307 rtx result_reg = gen_reg_rtx (V16QImode);
5308 rtx is_aligned_label = gen_label_rtx ();
5309 rtx into_loop_label = NULL_RTX;
5310 rtx loop_start_label = gen_label_rtx ();
5311 rtx temp;
5312 rtx len = gen_reg_rtx (QImode);
5313 rtx cond;
5314
5315 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5316 emit_move_insn (str_idx_reg, const0_rtx);
5317
5318 if (INTVAL (alignment) < 16)
5319 {
5320 /* Check whether the address happens to be aligned properly so
5321 jump directly to the aligned loop. */
5322 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5323 str_addr_base_reg, GEN_INT (15)),
5324 const0_rtx, EQ, NULL_RTX,
5325 Pmode, 1, is_aligned_label);
5326
5327 temp = gen_reg_rtx (Pmode);
5328 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5329 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5330 gcc_assert (REG_P (temp));
5331 highest_index_to_load_reg =
5332 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5333 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5334 gcc_assert (REG_P (highest_index_to_load_reg));
5335 emit_insn (gen_vllv16qi (str_reg,
5336 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5337 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5338
5339 into_loop_label = gen_label_rtx ();
5340 s390_emit_jump (into_loop_label, NULL_RTX);
5341 emit_barrier ();
5342 }
5343
5344 emit_label (is_aligned_label);
5345 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5346
5347 /* Reaching this point we are only performing 16 bytes aligned
5348 loads. */
5349 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5350
5351 emit_label (loop_start_label);
5352 LABEL_NUSES (loop_start_label) = 1;
5353
5354 /* Load 16 bytes of the string into VR. */
5355 emit_move_insn (str_reg,
5356 gen_rtx_MEM (V16QImode,
5357 gen_rtx_PLUS (Pmode, str_idx_reg,
5358 str_addr_base_reg)));
5359 if (into_loop_label != NULL_RTX)
5360 {
5361 emit_label (into_loop_label);
5362 LABEL_NUSES (into_loop_label) = 1;
5363 }
5364
5365 /* Increment string index by 16 bytes. */
5366 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5367 str_idx_reg, 1, OPTAB_DIRECT);
5368
5369 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5370 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5371
5372 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5373 REG_BR_PROB, very_likely);
5374 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5375
5376 /* If the string pointer wasn't aligned we have loaded less then 16
5377 bytes and the remaining bytes got filled with zeros (by vll).
5378 Now we have to check whether the resulting index lies within the
5379 bytes actually part of the string. */
5380
5381 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5382 highest_index_to_load_reg);
5383 s390_load_address (highest_index_to_load_reg,
5384 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5385 const1_rtx));
5386 if (TARGET_64BIT)
5387 emit_insn (gen_movdicc (str_idx_reg, cond,
5388 highest_index_to_load_reg, str_idx_reg));
5389 else
5390 emit_insn (gen_movsicc (str_idx_reg, cond,
5391 highest_index_to_load_reg, str_idx_reg));
5392
5393 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5394 very_unlikely);
5395
5396 expand_binop (Pmode, add_optab, str_idx_reg,
5397 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5398 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5399 here. */
5400 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5401 convert_to_mode (Pmode, len, 1),
5402 target, 1, OPTAB_DIRECT);
5403 if (temp != target)
5404 emit_move_insn (target, temp);
5405 }
5406
5407 /* Expand conditional increment or decrement using alc/slb instructions.
5408 Should generate code setting DST to either SRC or SRC + INCREMENT,
5409 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5410 Returns true if successful, false otherwise.
5411
5412 That makes it possible to implement some if-constructs without jumps e.g.:
5413 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5414 unsigned int a, b, c;
5415 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5416 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5417 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5418 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5419
5420 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5421 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5422 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5423 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5424 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5425
5426 bool
5427 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5428 rtx dst, rtx src, rtx increment)
5429 {
5430 machine_mode cmp_mode;
5431 machine_mode cc_mode;
5432 rtx op_res;
5433 rtx insn;
5434 rtvec p;
5435 int ret;
5436
5437 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5438 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5439 cmp_mode = SImode;
5440 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5441 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5442 cmp_mode = DImode;
5443 else
5444 return false;
5445
5446 /* Try ADD LOGICAL WITH CARRY. */
5447 if (increment == const1_rtx)
5448 {
5449 /* Determine CC mode to use. */
5450 if (cmp_code == EQ || cmp_code == NE)
5451 {
5452 if (cmp_op1 != const0_rtx)
5453 {
5454 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5455 NULL_RTX, 0, OPTAB_WIDEN);
5456 cmp_op1 = const0_rtx;
5457 }
5458
5459 cmp_code = cmp_code == EQ ? LEU : GTU;
5460 }
5461
5462 if (cmp_code == LTU || cmp_code == LEU)
5463 {
5464 rtx tem = cmp_op0;
5465 cmp_op0 = cmp_op1;
5466 cmp_op1 = tem;
5467 cmp_code = swap_condition (cmp_code);
5468 }
5469
5470 switch (cmp_code)
5471 {
5472 case GTU:
5473 cc_mode = CCUmode;
5474 break;
5475
5476 case GEU:
5477 cc_mode = CCL3mode;
5478 break;
5479
5480 default:
5481 return false;
5482 }
5483
5484 /* Emit comparison instruction pattern. */
5485 if (!register_operand (cmp_op0, cmp_mode))
5486 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5487
5488 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5489 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5490 /* We use insn_invalid_p here to add clobbers if required. */
5491 ret = insn_invalid_p (emit_insn (insn), false);
5492 gcc_assert (!ret);
5493
5494 /* Emit ALC instruction pattern. */
5495 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5496 gen_rtx_REG (cc_mode, CC_REGNUM),
5497 const0_rtx);
5498
5499 if (src != const0_rtx)
5500 {
5501 if (!register_operand (src, GET_MODE (dst)))
5502 src = force_reg (GET_MODE (dst), src);
5503
5504 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5505 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5506 }
5507
5508 p = rtvec_alloc (2);
5509 RTVEC_ELT (p, 0) =
5510 gen_rtx_SET (dst, op_res);
5511 RTVEC_ELT (p, 1) =
5512 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5513 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5514
5515 return true;
5516 }
5517
5518 /* Try SUBTRACT LOGICAL WITH BORROW. */
5519 if (increment == constm1_rtx)
5520 {
5521 /* Determine CC mode to use. */
5522 if (cmp_code == EQ || cmp_code == NE)
5523 {
5524 if (cmp_op1 != const0_rtx)
5525 {
5526 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5527 NULL_RTX, 0, OPTAB_WIDEN);
5528 cmp_op1 = const0_rtx;
5529 }
5530
5531 cmp_code = cmp_code == EQ ? LEU : GTU;
5532 }
5533
5534 if (cmp_code == GTU || cmp_code == GEU)
5535 {
5536 rtx tem = cmp_op0;
5537 cmp_op0 = cmp_op1;
5538 cmp_op1 = tem;
5539 cmp_code = swap_condition (cmp_code);
5540 }
5541
5542 switch (cmp_code)
5543 {
5544 case LEU:
5545 cc_mode = CCUmode;
5546 break;
5547
5548 case LTU:
5549 cc_mode = CCL3mode;
5550 break;
5551
5552 default:
5553 return false;
5554 }
5555
5556 /* Emit comparison instruction pattern. */
5557 if (!register_operand (cmp_op0, cmp_mode))
5558 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5559
5560 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5561 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5562 /* We use insn_invalid_p here to add clobbers if required. */
5563 ret = insn_invalid_p (emit_insn (insn), false);
5564 gcc_assert (!ret);
5565
5566 /* Emit SLB instruction pattern. */
5567 if (!register_operand (src, GET_MODE (dst)))
5568 src = force_reg (GET_MODE (dst), src);
5569
5570 op_res = gen_rtx_MINUS (GET_MODE (dst),
5571 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5572 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5573 gen_rtx_REG (cc_mode, CC_REGNUM),
5574 const0_rtx));
5575 p = rtvec_alloc (2);
5576 RTVEC_ELT (p, 0) =
5577 gen_rtx_SET (dst, op_res);
5578 RTVEC_ELT (p, 1) =
5579 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5580 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5581
5582 return true;
5583 }
5584
5585 return false;
5586 }
5587
5588 /* Expand code for the insv template. Return true if successful. */
5589
5590 bool
5591 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5592 {
5593 int bitsize = INTVAL (op1);
5594 int bitpos = INTVAL (op2);
5595 machine_mode mode = GET_MODE (dest);
5596 machine_mode smode;
5597 int smode_bsize, mode_bsize;
5598 rtx op, clobber;
5599
5600 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5601 return false;
5602
5603 /* Generate INSERT IMMEDIATE (IILL et al). */
5604 /* (set (ze (reg)) (const_int)). */
5605 if (TARGET_ZARCH
5606 && register_operand (dest, word_mode)
5607 && (bitpos % 16) == 0
5608 && (bitsize % 16) == 0
5609 && const_int_operand (src, VOIDmode))
5610 {
5611 HOST_WIDE_INT val = INTVAL (src);
5612 int regpos = bitpos + bitsize;
5613
5614 while (regpos > bitpos)
5615 {
5616 machine_mode putmode;
5617 int putsize;
5618
5619 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5620 putmode = SImode;
5621 else
5622 putmode = HImode;
5623
5624 putsize = GET_MODE_BITSIZE (putmode);
5625 regpos -= putsize;
5626 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5627 GEN_INT (putsize),
5628 GEN_INT (regpos)),
5629 gen_int_mode (val, putmode));
5630 val >>= putsize;
5631 }
5632 gcc_assert (regpos == bitpos);
5633 return true;
5634 }
5635
5636 smode = smallest_mode_for_size (bitsize, MODE_INT);
5637 smode_bsize = GET_MODE_BITSIZE (smode);
5638 mode_bsize = GET_MODE_BITSIZE (mode);
5639
5640 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5641 if (bitpos == 0
5642 && (bitsize % BITS_PER_UNIT) == 0
5643 && MEM_P (dest)
5644 && (register_operand (src, word_mode)
5645 || const_int_operand (src, VOIDmode)))
5646 {
5647 /* Emit standard pattern if possible. */
5648 if (smode_bsize == bitsize)
5649 {
5650 emit_move_insn (adjust_address (dest, smode, 0),
5651 gen_lowpart (smode, src));
5652 return true;
5653 }
5654
5655 /* (set (ze (mem)) (const_int)). */
5656 else if (const_int_operand (src, VOIDmode))
5657 {
5658 int size = bitsize / BITS_PER_UNIT;
5659 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5660 BLKmode,
5661 UNITS_PER_WORD - size);
5662
5663 dest = adjust_address (dest, BLKmode, 0);
5664 set_mem_size (dest, size);
5665 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5666 return true;
5667 }
5668
5669 /* (set (ze (mem)) (reg)). */
5670 else if (register_operand (src, word_mode))
5671 {
5672 if (bitsize <= 32)
5673 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5674 const0_rtx), src);
5675 else
5676 {
5677 /* Emit st,stcmh sequence. */
5678 int stcmh_width = bitsize - 32;
5679 int size = stcmh_width / BITS_PER_UNIT;
5680
5681 emit_move_insn (adjust_address (dest, SImode, size),
5682 gen_lowpart (SImode, src));
5683 set_mem_size (dest, size);
5684 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5685 GEN_INT (stcmh_width),
5686 const0_rtx),
5687 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5688 }
5689 return true;
5690 }
5691 }
5692
5693 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5694 if ((bitpos % BITS_PER_UNIT) == 0
5695 && (bitsize % BITS_PER_UNIT) == 0
5696 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5697 && MEM_P (src)
5698 && (mode == DImode || mode == SImode)
5699 && register_operand (dest, mode))
5700 {
5701 /* Emit a strict_low_part pattern if possible. */
5702 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5703 {
5704 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5705 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5706 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5707 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5708 return true;
5709 }
5710
5711 /* ??? There are more powerful versions of ICM that are not
5712 completely represented in the md file. */
5713 }
5714
5715 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5716 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5717 {
5718 machine_mode mode_s = GET_MODE (src);
5719
5720 if (mode_s == VOIDmode)
5721 {
5722 /* Assume const_int etc already in the proper mode. */
5723 src = force_reg (mode, src);
5724 }
5725 else if (mode_s != mode)
5726 {
5727 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5728 src = force_reg (mode_s, src);
5729 src = gen_lowpart (mode, src);
5730 }
5731
5732 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5733 op = gen_rtx_SET (op, src);
5734
5735 if (!TARGET_ZEC12)
5736 {
5737 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5738 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5739 }
5740 emit_insn (op);
5741
5742 return true;
5743 }
5744
5745 return false;
5746 }
5747
5748 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5749 register that holds VAL of mode MODE shifted by COUNT bits. */
5750
5751 static inline rtx
5752 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5753 {
5754 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5755 NULL_RTX, 1, OPTAB_DIRECT);
5756 return expand_simple_binop (SImode, ASHIFT, val, count,
5757 NULL_RTX, 1, OPTAB_DIRECT);
5758 }
5759
5760 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5761 the result in TARGET. */
5762
5763 void
5764 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5765 rtx cmp_op1, rtx cmp_op2)
5766 {
5767 machine_mode mode = GET_MODE (target);
5768 bool neg_p = false, swap_p = false;
5769 rtx tmp;
5770
5771 if (GET_MODE (cmp_op1) == V2DFmode)
5772 {
5773 switch (cond)
5774 {
5775 /* NE a != b -> !(a == b) */
5776 case NE: cond = EQ; neg_p = true; break;
5777 /* UNGT a u> b -> !(b >= a) */
5778 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5779 /* UNGE a u>= b -> !(b > a) */
5780 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5781 /* LE: a <= b -> b >= a */
5782 case LE: cond = GE; swap_p = true; break;
5783 /* UNLE: a u<= b -> !(a > b) */
5784 case UNLE: cond = GT; neg_p = true; break;
5785 /* LT: a < b -> b > a */
5786 case LT: cond = GT; swap_p = true; break;
5787 /* UNLT: a u< b -> !(a >= b) */
5788 case UNLT: cond = GE; neg_p = true; break;
5789 case UNEQ:
5790 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
5791 return;
5792 case LTGT:
5793 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
5794 return;
5795 case ORDERED:
5796 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
5797 return;
5798 case UNORDERED:
5799 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
5800 return;
5801 default: break;
5802 }
5803 }
5804 else
5805 {
5806 switch (cond)
5807 {
5808 /* NE: a != b -> !(a == b) */
5809 case NE: cond = EQ; neg_p = true; break;
5810 /* GE: a >= b -> !(b > a) */
5811 case GE: cond = GT; neg_p = true; swap_p = true; break;
5812 /* GEU: a >= b -> !(b > a) */
5813 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
5814 /* LE: a <= b -> !(a > b) */
5815 case LE: cond = GT; neg_p = true; break;
5816 /* LEU: a <= b -> !(a > b) */
5817 case LEU: cond = GTU; neg_p = true; break;
5818 /* LT: a < b -> b > a */
5819 case LT: cond = GT; swap_p = true; break;
5820 /* LTU: a < b -> b > a */
5821 case LTU: cond = GTU; swap_p = true; break;
5822 default: break;
5823 }
5824 }
5825
5826 if (swap_p)
5827 {
5828 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
5829 }
5830
5831 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
5832 mode,
5833 cmp_op1, cmp_op2)));
5834 if (neg_p)
5835 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
5836 }
5837
5838 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
5839 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
5840 elements in CMP1 and CMP2 fulfill the comparison. */
5841 void
5842 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
5843 rtx cmp1, rtx cmp2, bool all_p)
5844 {
5845 enum rtx_code new_code = code;
5846 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
5847 rtx tmp_reg = gen_reg_rtx (SImode);
5848 bool swap_p = false;
5849
5850 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
5851 {
5852 switch (code)
5853 {
5854 case EQ: cmp_mode = CCVEQmode; break;
5855 case NE: cmp_mode = CCVEQmode; break;
5856 case GT: cmp_mode = CCVHmode; break;
5857 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
5858 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
5859 case LE: cmp_mode = CCVHmode; new_code = LE; break;
5860 case GTU: cmp_mode = CCVHUmode; break;
5861 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
5862 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
5863 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
5864 default: gcc_unreachable ();
5865 }
5866 scratch_mode = GET_MODE (cmp1);
5867 }
5868 else if (GET_MODE (cmp1) == V2DFmode)
5869 {
5870 switch (code)
5871 {
5872 case EQ: cmp_mode = CCVEQmode; break;
5873 case NE: cmp_mode = CCVEQmode; break;
5874 case GT: cmp_mode = CCVFHmode; break;
5875 case GE: cmp_mode = CCVFHEmode; break;
5876 case UNLE: cmp_mode = CCVFHmode; break;
5877 case UNLT: cmp_mode = CCVFHEmode; break;
5878 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
5879 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
5880 default: gcc_unreachable ();
5881 }
5882 scratch_mode = V2DImode;
5883 }
5884 else
5885 gcc_unreachable ();
5886
5887 if (!all_p)
5888 switch (cmp_mode)
5889 {
5890 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
5891 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
5892 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
5893 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
5894 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
5895 default: gcc_unreachable ();
5896 }
5897 else
5898 /* The modes without ANY match the ALL modes. */
5899 full_cmp_mode = cmp_mode;
5900
5901 if (swap_p)
5902 {
5903 rtx tmp = cmp2;
5904 cmp2 = cmp1;
5905 cmp1 = tmp;
5906 }
5907
5908 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5909 gen_rtvec (2, gen_rtx_SET (
5910 gen_rtx_REG (cmp_mode, CC_REGNUM),
5911 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
5912 gen_rtx_CLOBBER (VOIDmode,
5913 gen_rtx_SCRATCH (scratch_mode)))));
5914 emit_move_insn (target, const0_rtx);
5915 emit_move_insn (tmp_reg, const1_rtx);
5916
5917 emit_move_insn (target,
5918 gen_rtx_IF_THEN_ELSE (SImode,
5919 gen_rtx_fmt_ee (new_code, VOIDmode,
5920 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
5921 const0_rtx),
5922 target, tmp_reg));
5923 }
5924
5925 /* Generate a vector comparison expression loading either elements of
5926 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
5927 and CMP_OP2. */
5928
5929 void
5930 s390_expand_vcond (rtx target, rtx then, rtx els,
5931 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
5932 {
5933 rtx tmp;
5934 machine_mode result_mode;
5935 rtx result_target;
5936
5937 /* We always use an integral type vector to hold the comparison
5938 result. */
5939 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
5940 result_target = gen_reg_rtx (result_mode);
5941
5942 /* Alternatively this could be done by reload by lowering the cmp*
5943 predicates. But it appears to be better for scheduling etc. to
5944 have that in early. */
5945 if (!REG_P (cmp_op1))
5946 cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
5947
5948 if (!REG_P (cmp_op2))
5949 cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
5950
5951 s390_expand_vec_compare (result_target, cond,
5952 cmp_op1, cmp_op2);
5953
5954 /* If the results are supposed to be either -1 or 0 we are done
5955 since this is what our compare instructions generate anyway. */
5956 if (constm1_operand (then, GET_MODE (then))
5957 && const0_operand (els, GET_MODE (els)))
5958 {
5959 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
5960 result_target, 0));
5961 return;
5962 }
5963
5964 /* Otherwise we will do a vsel afterwards. */
5965 /* This gets triggered e.g.
5966 with gcc.c-torture/compile/pr53410-1.c */
5967 if (!REG_P (then))
5968 then = force_reg (GET_MODE (target), then);
5969
5970 if (!REG_P (els))
5971 els = force_reg (GET_MODE (target), els);
5972
5973 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
5974 result_target,
5975 CONST0_RTX (result_mode));
5976
5977 /* We compared the result against zero above so we have to swap then
5978 and els here. */
5979 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
5980
5981 gcc_assert (GET_MODE (target) == GET_MODE (then));
5982 emit_insn (gen_rtx_SET (target, tmp));
5983 }
5984
5985 /* Emit the RTX necessary to initialize the vector TARGET with values
5986 in VALS. */
5987 void
5988 s390_expand_vec_init (rtx target, rtx vals)
5989 {
5990 machine_mode mode = GET_MODE (target);
5991 machine_mode inner_mode = GET_MODE_INNER (mode);
5992 int n_elts = GET_MODE_NUNITS (mode);
5993 bool all_same = true, all_regs = true, all_const_int = true;
5994 rtx x;
5995 int i;
5996
5997 for (i = 0; i < n_elts; ++i)
5998 {
5999 x = XVECEXP (vals, 0, i);
6000
6001 if (!CONST_INT_P (x))
6002 all_const_int = false;
6003
6004 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6005 all_same = false;
6006
6007 if (!REG_P (x))
6008 all_regs = false;
6009 }
6010
6011 /* Use vector gen mask or vector gen byte mask if possible. */
6012 if (all_same && all_const_int
6013 && (XVECEXP (vals, 0, 0) == const0_rtx
6014 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6015 NULL, NULL)
6016 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6017 {
6018 emit_insn (gen_rtx_SET (target,
6019 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6020 return;
6021 }
6022
6023 if (all_same)
6024 {
6025 emit_insn (gen_rtx_SET (target,
6026 gen_rtx_VEC_DUPLICATE (mode,
6027 XVECEXP (vals, 0, 0))));
6028 return;
6029 }
6030
6031 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6032 {
6033 /* Use vector load pair. */
6034 emit_insn (gen_rtx_SET (target,
6035 gen_rtx_VEC_CONCAT (mode,
6036 XVECEXP (vals, 0, 0),
6037 XVECEXP (vals, 0, 1))));
6038 return;
6039 }
6040
6041 /* We are about to set the vector elements one by one. Zero out the
6042 full register first in order to help the data flow framework to
6043 detect it as full VR set. */
6044 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6045
6046 /* Unfortunately the vec_init expander is not allowed to fail. So
6047 we have to implement the fallback ourselves. */
6048 for (i = 0; i < n_elts; i++)
6049 emit_insn (gen_rtx_SET (target,
6050 gen_rtx_UNSPEC (mode,
6051 gen_rtvec (3, XVECEXP (vals, 0, i),
6052 GEN_INT (i), target),
6053 UNSPEC_VEC_SET)));
6054 }
6055
6056 /* Structure to hold the initial parameters for a compare_and_swap operation
6057 in HImode and QImode. */
6058
6059 struct alignment_context
6060 {
6061 rtx memsi; /* SI aligned memory location. */
6062 rtx shift; /* Bit offset with regard to lsb. */
6063 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6064 rtx modemaski; /* ~modemask */
6065 bool aligned; /* True if memory is aligned, false else. */
6066 };
6067
6068 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6069 structure AC for transparent simplifying, if the memory alignment is known
6070 to be at least 32bit. MEM is the memory location for the actual operation
6071 and MODE its mode. */
6072
6073 static void
6074 init_alignment_context (struct alignment_context *ac, rtx mem,
6075 machine_mode mode)
6076 {
6077 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6078 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6079
6080 if (ac->aligned)
6081 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6082 else
6083 {
6084 /* Alignment is unknown. */
6085 rtx byteoffset, addr, align;
6086
6087 /* Force the address into a register. */
6088 addr = force_reg (Pmode, XEXP (mem, 0));
6089
6090 /* Align it to SImode. */
6091 align = expand_simple_binop (Pmode, AND, addr,
6092 GEN_INT (-GET_MODE_SIZE (SImode)),
6093 NULL_RTX, 1, OPTAB_DIRECT);
6094 /* Generate MEM. */
6095 ac->memsi = gen_rtx_MEM (SImode, align);
6096 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6097 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6098 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6099
6100 /* Calculate shiftcount. */
6101 byteoffset = expand_simple_binop (Pmode, AND, addr,
6102 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6103 NULL_RTX, 1, OPTAB_DIRECT);
6104 /* As we already have some offset, evaluate the remaining distance. */
6105 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6106 NULL_RTX, 1, OPTAB_DIRECT);
6107 }
6108
6109 /* Shift is the byte count, but we need the bitcount. */
6110 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6111 NULL_RTX, 1, OPTAB_DIRECT);
6112
6113 /* Calculate masks. */
6114 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6115 GEN_INT (GET_MODE_MASK (mode)),
6116 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6117 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6118 NULL_RTX, 1);
6119 }
6120
6121 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6122 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6123 perform the merge in SEQ2. */
6124
6125 static rtx
6126 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6127 machine_mode mode, rtx val, rtx ins)
6128 {
6129 rtx tmp;
6130
6131 if (ac->aligned)
6132 {
6133 start_sequence ();
6134 tmp = copy_to_mode_reg (SImode, val);
6135 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6136 const0_rtx, ins))
6137 {
6138 *seq1 = NULL;
6139 *seq2 = get_insns ();
6140 end_sequence ();
6141 return tmp;
6142 }
6143 end_sequence ();
6144 }
6145
6146 /* Failed to use insv. Generate a two part shift and mask. */
6147 start_sequence ();
6148 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6149 *seq1 = get_insns ();
6150 end_sequence ();
6151
6152 start_sequence ();
6153 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6154 *seq2 = get_insns ();
6155 end_sequence ();
6156
6157 return tmp;
6158 }
6159
6160 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6161 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6162 value to set if CMP == MEM. */
6163
6164 void
6165 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6166 rtx cmp, rtx new_rtx, bool is_weak)
6167 {
6168 struct alignment_context ac;
6169 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6170 rtx res = gen_reg_rtx (SImode);
6171 rtx_code_label *csloop = NULL, *csend = NULL;
6172
6173 gcc_assert (MEM_P (mem));
6174
6175 init_alignment_context (&ac, mem, mode);
6176
6177 /* Load full word. Subsequent loads are performed by CS. */
6178 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6179 NULL_RTX, 1, OPTAB_DIRECT);
6180
6181 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6182 possible, we try to use insv to make this happen efficiently. If
6183 that fails we'll generate code both inside and outside the loop. */
6184 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6185 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6186
6187 if (seq0)
6188 emit_insn (seq0);
6189 if (seq1)
6190 emit_insn (seq1);
6191
6192 /* Start CS loop. */
6193 if (!is_weak)
6194 {
6195 /* Begin assuming success. */
6196 emit_move_insn (btarget, const1_rtx);
6197
6198 csloop = gen_label_rtx ();
6199 csend = gen_label_rtx ();
6200 emit_label (csloop);
6201 }
6202
6203 /* val = "<mem>00..0<mem>"
6204 * cmp = "00..0<cmp>00..0"
6205 * new = "00..0<new>00..0"
6206 */
6207
6208 emit_insn (seq2);
6209 emit_insn (seq3);
6210
6211 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6212 if (is_weak)
6213 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6214 else
6215 {
6216 rtx tmp;
6217
6218 /* Jump to end if we're done (likely?). */
6219 s390_emit_jump (csend, cc);
6220
6221 /* Check for changes outside mode, and loop internal if so.
6222 Arrange the moves so that the compare is adjacent to the
6223 branch so that we can generate CRJ. */
6224 tmp = copy_to_reg (val);
6225 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6226 1, OPTAB_DIRECT);
6227 cc = s390_emit_compare (NE, val, tmp);
6228 s390_emit_jump (csloop, cc);
6229
6230 /* Failed. */
6231 emit_move_insn (btarget, const0_rtx);
6232 emit_label (csend);
6233 }
6234
6235 /* Return the correct part of the bitfield. */
6236 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6237 NULL_RTX, 1, OPTAB_DIRECT), 1);
6238 }
6239
6240 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6241 and VAL the value to play with. If AFTER is true then store the value
6242 MEM holds after the operation, if AFTER is false then store the value MEM
6243 holds before the operation. If TARGET is zero then discard that value, else
6244 store it to TARGET. */
6245
6246 void
6247 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6248 rtx target, rtx mem, rtx val, bool after)
6249 {
6250 struct alignment_context ac;
6251 rtx cmp;
6252 rtx new_rtx = gen_reg_rtx (SImode);
6253 rtx orig = gen_reg_rtx (SImode);
6254 rtx_code_label *csloop = gen_label_rtx ();
6255
6256 gcc_assert (!target || register_operand (target, VOIDmode));
6257 gcc_assert (MEM_P (mem));
6258
6259 init_alignment_context (&ac, mem, mode);
6260
6261 /* Shift val to the correct bit positions.
6262 Preserve "icm", but prevent "ex icm". */
6263 if (!(ac.aligned && code == SET && MEM_P (val)))
6264 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6265
6266 /* Further preparation insns. */
6267 if (code == PLUS || code == MINUS)
6268 emit_move_insn (orig, val);
6269 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6270 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6271 NULL_RTX, 1, OPTAB_DIRECT);
6272
6273 /* Load full word. Subsequent loads are performed by CS. */
6274 cmp = force_reg (SImode, ac.memsi);
6275
6276 /* Start CS loop. */
6277 emit_label (csloop);
6278 emit_move_insn (new_rtx, cmp);
6279
6280 /* Patch new with val at correct position. */
6281 switch (code)
6282 {
6283 case PLUS:
6284 case MINUS:
6285 val = expand_simple_binop (SImode, code, new_rtx, orig,
6286 NULL_RTX, 1, OPTAB_DIRECT);
6287 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6288 NULL_RTX, 1, OPTAB_DIRECT);
6289 /* FALLTHRU */
6290 case SET:
6291 if (ac.aligned && MEM_P (val))
6292 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6293 0, 0, SImode, val);
6294 else
6295 {
6296 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6297 NULL_RTX, 1, OPTAB_DIRECT);
6298 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6299 NULL_RTX, 1, OPTAB_DIRECT);
6300 }
6301 break;
6302 case AND:
6303 case IOR:
6304 case XOR:
6305 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6306 NULL_RTX, 1, OPTAB_DIRECT);
6307 break;
6308 case MULT: /* NAND */
6309 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6310 NULL_RTX, 1, OPTAB_DIRECT);
6311 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6312 NULL_RTX, 1, OPTAB_DIRECT);
6313 break;
6314 default:
6315 gcc_unreachable ();
6316 }
6317
6318 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6319 ac.memsi, cmp, new_rtx));
6320
6321 /* Return the correct part of the bitfield. */
6322 if (target)
6323 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6324 after ? new_rtx : cmp, ac.shift,
6325 NULL_RTX, 1, OPTAB_DIRECT), 1);
6326 }
6327
6328 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6329 We need to emit DTP-relative relocations. */
6330
6331 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6332
6333 static void
6334 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6335 {
6336 switch (size)
6337 {
6338 case 4:
6339 fputs ("\t.long\t", file);
6340 break;
6341 case 8:
6342 fputs ("\t.quad\t", file);
6343 break;
6344 default:
6345 gcc_unreachable ();
6346 }
6347 output_addr_const (file, x);
6348 fputs ("@DTPOFF", file);
6349 }
6350
6351 /* Return the proper mode for REGNO being represented in the dwarf
6352 unwind table. */
6353 machine_mode
6354 s390_dwarf_frame_reg_mode (int regno)
6355 {
6356 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6357
6358 /* The rightmost 64 bits of vector registers are call-clobbered. */
6359 if (GET_MODE_SIZE (save_mode) > 8)
6360 save_mode = DImode;
6361
6362 return save_mode;
6363 }
6364
6365 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6366 /* Implement TARGET_MANGLE_TYPE. */
6367
6368 static const char *
6369 s390_mangle_type (const_tree type)
6370 {
6371 type = TYPE_MAIN_VARIANT (type);
6372
6373 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6374 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6375 return NULL;
6376
6377 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6378 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6379 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6380 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6381
6382 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6383 && TARGET_LONG_DOUBLE_128)
6384 return "g";
6385
6386 /* For all other types, use normal C++ mangling. */
6387 return NULL;
6388 }
6389 #endif
6390
6391 /* In the name of slightly smaller debug output, and to cater to
6392 general assembler lossage, recognize various UNSPEC sequences
6393 and turn them back into a direct symbol reference. */
6394
6395 static rtx
6396 s390_delegitimize_address (rtx orig_x)
6397 {
6398 rtx x, y;
6399
6400 orig_x = delegitimize_mem_from_attrs (orig_x);
6401 x = orig_x;
6402
6403 /* Extract the symbol ref from:
6404 (plus:SI (reg:SI 12 %r12)
6405 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6406 UNSPEC_GOTOFF/PLTOFF)))
6407 and
6408 (plus:SI (reg:SI 12 %r12)
6409 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6410 UNSPEC_GOTOFF/PLTOFF)
6411 (const_int 4 [0x4])))) */
6412 if (GET_CODE (x) == PLUS
6413 && REG_P (XEXP (x, 0))
6414 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6415 && GET_CODE (XEXP (x, 1)) == CONST)
6416 {
6417 HOST_WIDE_INT offset = 0;
6418
6419 /* The const operand. */
6420 y = XEXP (XEXP (x, 1), 0);
6421
6422 if (GET_CODE (y) == PLUS
6423 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6424 {
6425 offset = INTVAL (XEXP (y, 1));
6426 y = XEXP (y, 0);
6427 }
6428
6429 if (GET_CODE (y) == UNSPEC
6430 && (XINT (y, 1) == UNSPEC_GOTOFF
6431 || XINT (y, 1) == UNSPEC_PLTOFF))
6432 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6433 }
6434
6435 if (GET_CODE (x) != MEM)
6436 return orig_x;
6437
6438 x = XEXP (x, 0);
6439 if (GET_CODE (x) == PLUS
6440 && GET_CODE (XEXP (x, 1)) == CONST
6441 && GET_CODE (XEXP (x, 0)) == REG
6442 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6443 {
6444 y = XEXP (XEXP (x, 1), 0);
6445 if (GET_CODE (y) == UNSPEC
6446 && XINT (y, 1) == UNSPEC_GOT)
6447 y = XVECEXP (y, 0, 0);
6448 else
6449 return orig_x;
6450 }
6451 else if (GET_CODE (x) == CONST)
6452 {
6453 /* Extract the symbol ref from:
6454 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6455 UNSPEC_PLT/GOTENT))) */
6456
6457 y = XEXP (x, 0);
6458 if (GET_CODE (y) == UNSPEC
6459 && (XINT (y, 1) == UNSPEC_GOTENT
6460 || XINT (y, 1) == UNSPEC_PLT))
6461 y = XVECEXP (y, 0, 0);
6462 else
6463 return orig_x;
6464 }
6465 else
6466 return orig_x;
6467
6468 if (GET_MODE (orig_x) != Pmode)
6469 {
6470 if (GET_MODE (orig_x) == BLKmode)
6471 return orig_x;
6472 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6473 if (y == NULL_RTX)
6474 return orig_x;
6475 }
6476 return y;
6477 }
6478
6479 /* Output operand OP to stdio stream FILE.
6480 OP is an address (register + offset) which is not used to address data;
6481 instead the rightmost bits are interpreted as the value. */
6482
6483 static void
6484 print_shift_count_operand (FILE *file, rtx op)
6485 {
6486 HOST_WIDE_INT offset;
6487 rtx base;
6488
6489 /* Extract base register and offset. */
6490 if (!s390_decompose_shift_count (op, &base, &offset))
6491 gcc_unreachable ();
6492
6493 /* Sanity check. */
6494 if (base)
6495 {
6496 gcc_assert (GET_CODE (base) == REG);
6497 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6498 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6499 }
6500
6501 /* Offsets are constricted to twelve bits. */
6502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6503 if (base)
6504 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6505 }
6506
6507 /* Assigns the number of NOP halfwords to be emitted before and after the
6508 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6509 If hotpatching is disabled for the function, the values are set to zero.
6510 */
6511
6512 static void
6513 s390_function_num_hotpatch_hw (tree decl,
6514 int *hw_before,
6515 int *hw_after)
6516 {
6517 tree attr;
6518
6519 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6520
6521 /* Handle the arguments of the hotpatch attribute. The values
6522 specified via attribute might override the cmdline argument
6523 values. */
6524 if (attr)
6525 {
6526 tree args = TREE_VALUE (attr);
6527
6528 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6529 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6530 }
6531 else
6532 {
6533 /* Use the values specified by the cmdline arguments. */
6534 *hw_before = s390_hotpatch_hw_before_label;
6535 *hw_after = s390_hotpatch_hw_after_label;
6536 }
6537 }
6538
6539 /* Write the extra assembler code needed to declare a function properly. */
6540
6541 void
6542 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6543 tree decl)
6544 {
6545 int hw_before, hw_after;
6546
6547 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6548 if (hw_before > 0)
6549 {
6550 unsigned int function_alignment;
6551 int i;
6552
6553 /* Add a trampoline code area before the function label and initialize it
6554 with two-byte nop instructions. This area can be overwritten with code
6555 that jumps to a patched version of the function. */
6556 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6557 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6558 hw_before);
6559 for (i = 1; i < hw_before; i++)
6560 fputs ("\tnopr\t%r7\n", asm_out_file);
6561
6562 /* Note: The function label must be aligned so that (a) the bytes of the
6563 following nop do not cross a cacheline boundary, and (b) a jump address
6564 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6565 stored directly before the label without crossing a cacheline
6566 boundary. All this is necessary to make sure the trampoline code can
6567 be changed atomically.
6568 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6569 if there are NOPs before the function label, the alignment is placed
6570 before them. So it is necessary to duplicate the alignment after the
6571 NOPs. */
6572 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6573 if (! DECL_USER_ALIGN (decl))
6574 function_alignment = MAX (function_alignment,
6575 (unsigned int) align_functions);
6576 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6577 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6578 }
6579
6580 ASM_OUTPUT_LABEL (asm_out_file, fname);
6581 if (hw_after > 0)
6582 asm_fprintf (asm_out_file,
6583 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6584 hw_after);
6585 }
6586
6587 /* Output machine-dependent UNSPECs occurring in address constant X
6588 in assembler syntax to stdio stream FILE. Returns true if the
6589 constant X could be recognized, false otherwise. */
6590
6591 static bool
6592 s390_output_addr_const_extra (FILE *file, rtx x)
6593 {
6594 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6595 switch (XINT (x, 1))
6596 {
6597 case UNSPEC_GOTENT:
6598 output_addr_const (file, XVECEXP (x, 0, 0));
6599 fprintf (file, "@GOTENT");
6600 return true;
6601 case UNSPEC_GOT:
6602 output_addr_const (file, XVECEXP (x, 0, 0));
6603 fprintf (file, "@GOT");
6604 return true;
6605 case UNSPEC_GOTOFF:
6606 output_addr_const (file, XVECEXP (x, 0, 0));
6607 fprintf (file, "@GOTOFF");
6608 return true;
6609 case UNSPEC_PLT:
6610 output_addr_const (file, XVECEXP (x, 0, 0));
6611 fprintf (file, "@PLT");
6612 return true;
6613 case UNSPEC_PLTOFF:
6614 output_addr_const (file, XVECEXP (x, 0, 0));
6615 fprintf (file, "@PLTOFF");
6616 return true;
6617 case UNSPEC_TLSGD:
6618 output_addr_const (file, XVECEXP (x, 0, 0));
6619 fprintf (file, "@TLSGD");
6620 return true;
6621 case UNSPEC_TLSLDM:
6622 assemble_name (file, get_some_local_dynamic_name ());
6623 fprintf (file, "@TLSLDM");
6624 return true;
6625 case UNSPEC_DTPOFF:
6626 output_addr_const (file, XVECEXP (x, 0, 0));
6627 fprintf (file, "@DTPOFF");
6628 return true;
6629 case UNSPEC_NTPOFF:
6630 output_addr_const (file, XVECEXP (x, 0, 0));
6631 fprintf (file, "@NTPOFF");
6632 return true;
6633 case UNSPEC_GOTNTPOFF:
6634 output_addr_const (file, XVECEXP (x, 0, 0));
6635 fprintf (file, "@GOTNTPOFF");
6636 return true;
6637 case UNSPEC_INDNTPOFF:
6638 output_addr_const (file, XVECEXP (x, 0, 0));
6639 fprintf (file, "@INDNTPOFF");
6640 return true;
6641 }
6642
6643 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6644 switch (XINT (x, 1))
6645 {
6646 case UNSPEC_POOL_OFFSET:
6647 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6648 output_addr_const (file, x);
6649 return true;
6650 }
6651 return false;
6652 }
6653
6654 /* Output address operand ADDR in assembler syntax to
6655 stdio stream FILE. */
6656
6657 void
6658 print_operand_address (FILE *file, rtx addr)
6659 {
6660 struct s390_address ad;
6661
6662 if (s390_loadrelative_operand_p (addr, NULL, NULL))
6663 {
6664 if (!TARGET_Z10)
6665 {
6666 output_operand_lossage ("symbolic memory references are "
6667 "only supported on z10 or later");
6668 return;
6669 }
6670 output_addr_const (file, addr);
6671 return;
6672 }
6673
6674 if (!s390_decompose_address (addr, &ad)
6675 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6676 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6677 output_operand_lossage ("cannot decompose address");
6678
6679 if (ad.disp)
6680 output_addr_const (file, ad.disp);
6681 else
6682 fprintf (file, "0");
6683
6684 if (ad.base && ad.indx)
6685 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6686 reg_names[REGNO (ad.base)]);
6687 else if (ad.base)
6688 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6689 }
6690
6691 /* Output operand X in assembler syntax to stdio stream FILE.
6692 CODE specified the format flag. The following format flags
6693 are recognized:
6694
6695 'C': print opcode suffix for branch condition.
6696 'D': print opcode suffix for inverse branch condition.
6697 'E': print opcode suffix for branch on index instruction.
6698 'G': print the size of the operand in bytes.
6699 'J': print tls_load/tls_gdcall/tls_ldcall suffix
6700 'M': print the second word of a TImode operand.
6701 'N': print the second word of a DImode operand.
6702 'O': print only the displacement of a memory reference or address.
6703 'R': print only the base register of a memory reference or address.
6704 'S': print S-type memory reference (base+displacement).
6705 'Y': print shift count operand.
6706
6707 'b': print integer X as if it's an unsigned byte.
6708 'c': print integer X as if it's an signed byte.
6709 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
6710 'f': "end" contiguous bitmask X in SImode.
6711 'h': print integer X as if it's a signed halfword.
6712 'i': print the first nonzero HImode part of X.
6713 'j': print the first HImode part unequal to -1 of X.
6714 'k': print the first nonzero SImode part of X.
6715 'm': print the first SImode part unequal to -1 of X.
6716 'o': print integer X as if it's an unsigned 32bit word.
6717 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
6718 't': CONST_INT: "start" of contiguous bitmask X in SImode.
6719 CONST_VECTOR: Generate a bitmask for vgbm instruction.
6720 'x': print integer X as if it's an unsigned halfword.
6721 'v': print register number as vector register (v1 instead of f1).
6722 */
6723
6724 void
6725 print_operand (FILE *file, rtx x, int code)
6726 {
6727 HOST_WIDE_INT ival;
6728
6729 switch (code)
6730 {
6731 case 'C':
6732 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
6733 return;
6734
6735 case 'D':
6736 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
6737 return;
6738
6739 case 'E':
6740 if (GET_CODE (x) == LE)
6741 fprintf (file, "l");
6742 else if (GET_CODE (x) == GT)
6743 fprintf (file, "h");
6744 else
6745 output_operand_lossage ("invalid comparison operator "
6746 "for 'E' output modifier");
6747 return;
6748
6749 case 'J':
6750 if (GET_CODE (x) == SYMBOL_REF)
6751 {
6752 fprintf (file, "%s", ":tls_load:");
6753 output_addr_const (file, x);
6754 }
6755 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
6756 {
6757 fprintf (file, "%s", ":tls_gdcall:");
6758 output_addr_const (file, XVECEXP (x, 0, 0));
6759 }
6760 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
6761 {
6762 fprintf (file, "%s", ":tls_ldcall:");
6763 const char *name = get_some_local_dynamic_name ();
6764 gcc_assert (name);
6765 assemble_name (file, name);
6766 }
6767 else
6768 output_operand_lossage ("invalid reference for 'J' output modifier");
6769 return;
6770
6771 case 'G':
6772 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
6773 return;
6774
6775 case 'O':
6776 {
6777 struct s390_address ad;
6778 int ret;
6779
6780 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6781
6782 if (!ret
6783 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6784 || ad.indx)
6785 {
6786 output_operand_lossage ("invalid address for 'O' output modifier");
6787 return;
6788 }
6789
6790 if (ad.disp)
6791 output_addr_const (file, ad.disp);
6792 else
6793 fprintf (file, "0");
6794 }
6795 return;
6796
6797 case 'R':
6798 {
6799 struct s390_address ad;
6800 int ret;
6801
6802 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6803
6804 if (!ret
6805 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6806 || ad.indx)
6807 {
6808 output_operand_lossage ("invalid address for 'R' output modifier");
6809 return;
6810 }
6811
6812 if (ad.base)
6813 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
6814 else
6815 fprintf (file, "0");
6816 }
6817 return;
6818
6819 case 'S':
6820 {
6821 struct s390_address ad;
6822 int ret;
6823
6824 if (!MEM_P (x))
6825 {
6826 output_operand_lossage ("memory reference expected for "
6827 "'S' output modifier");
6828 return;
6829 }
6830 ret = s390_decompose_address (XEXP (x, 0), &ad);
6831
6832 if (!ret
6833 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6834 || ad.indx)
6835 {
6836 output_operand_lossage ("invalid address for 'S' output modifier");
6837 return;
6838 }
6839
6840 if (ad.disp)
6841 output_addr_const (file, ad.disp);
6842 else
6843 fprintf (file, "0");
6844
6845 if (ad.base)
6846 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6847 }
6848 return;
6849
6850 case 'N':
6851 if (GET_CODE (x) == REG)
6852 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6853 else if (GET_CODE (x) == MEM)
6854 x = change_address (x, VOIDmode,
6855 plus_constant (Pmode, XEXP (x, 0), 4));
6856 else
6857 output_operand_lossage ("register or memory expression expected "
6858 "for 'N' output modifier");
6859 break;
6860
6861 case 'M':
6862 if (GET_CODE (x) == REG)
6863 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6864 else if (GET_CODE (x) == MEM)
6865 x = change_address (x, VOIDmode,
6866 plus_constant (Pmode, XEXP (x, 0), 8));
6867 else
6868 output_operand_lossage ("register or memory expression expected "
6869 "for 'M' output modifier");
6870 break;
6871
6872 case 'Y':
6873 print_shift_count_operand (file, x);
6874 return;
6875 }
6876
6877 switch (GET_CODE (x))
6878 {
6879 case REG:
6880 /* Print FP regs as fx instead of vx when they are accessed
6881 through non-vector mode. */
6882 if (code == 'v'
6883 || VECTOR_NOFP_REG_P (x)
6884 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
6885 || (VECTOR_REG_P (x)
6886 && (GET_MODE_SIZE (GET_MODE (x)) /
6887 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
6888 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
6889 else
6890 fprintf (file, "%s", reg_names[REGNO (x)]);
6891 break;
6892
6893 case MEM:
6894 output_address (XEXP (x, 0));
6895 break;
6896
6897 case CONST:
6898 case CODE_LABEL:
6899 case LABEL_REF:
6900 case SYMBOL_REF:
6901 output_addr_const (file, x);
6902 break;
6903
6904 case CONST_INT:
6905 ival = INTVAL (x);
6906 switch (code)
6907 {
6908 case 0:
6909 break;
6910 case 'b':
6911 ival &= 0xff;
6912 break;
6913 case 'c':
6914 ival = ((ival & 0xff) ^ 0x80) - 0x80;
6915 break;
6916 case 'x':
6917 ival &= 0xffff;
6918 break;
6919 case 'h':
6920 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
6921 break;
6922 case 'i':
6923 ival = s390_extract_part (x, HImode, 0);
6924 break;
6925 case 'j':
6926 ival = s390_extract_part (x, HImode, -1);
6927 break;
6928 case 'k':
6929 ival = s390_extract_part (x, SImode, 0);
6930 break;
6931 case 'm':
6932 ival = s390_extract_part (x, SImode, -1);
6933 break;
6934 case 'o':
6935 ival &= 0xffffffff;
6936 break;
6937 case 'e': case 'f':
6938 case 's': case 't':
6939 {
6940 int pos, len;
6941 bool ok;
6942
6943 len = (code == 's' || code == 'e' ? 64 : 32);
6944 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
6945 gcc_assert (ok);
6946 if (code == 's' || code == 't')
6947 ival = 64 - pos - len;
6948 else
6949 ival = 64 - 1 - pos;
6950 }
6951 break;
6952 default:
6953 output_operand_lossage ("invalid constant for output modifier '%c'", code);
6954 }
6955 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
6956 break;
6957
6958 case CONST_DOUBLE:
6959 gcc_assert (GET_MODE (x) == VOIDmode);
6960 if (code == 'b')
6961 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
6962 else if (code == 'x')
6963 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
6964 else if (code == 'h')
6965 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
6966 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
6967 else
6968 {
6969 if (code == 0)
6970 output_operand_lossage ("invalid constant - try using "
6971 "an output modifier");
6972 else
6973 output_operand_lossage ("invalid constant for output modifier '%c'",
6974 code);
6975 }
6976 break;
6977 case CONST_VECTOR:
6978 switch (code)
6979 {
6980 case 'e':
6981 case 's':
6982 {
6983 int start, stop, inner_len;
6984 bool ok;
6985
6986 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
6987 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
6988 gcc_assert (ok);
6989 if (code == 's' || code == 't')
6990 ival = inner_len - stop - 1;
6991 else
6992 ival = inner_len - start - 1;
6993 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
6994 }
6995 break;
6996 case 't':
6997 {
6998 unsigned mask;
6999 bool ok = s390_bytemask_vector_p (x, &mask);
7000 gcc_assert (ok);
7001 fprintf (file, "%u", mask);
7002 }
7003 break;
7004
7005 default:
7006 output_operand_lossage ("invalid constant vector for output "
7007 "modifier '%c'", code);
7008 }
7009 break;
7010
7011 default:
7012 if (code == 0)
7013 output_operand_lossage ("invalid expression - try using "
7014 "an output modifier");
7015 else
7016 output_operand_lossage ("invalid expression for output "
7017 "modifier '%c'", code);
7018 break;
7019 }
7020 }
7021
7022 /* Target hook for assembling integer objects. We need to define it
7023 here to work a round a bug in some versions of GAS, which couldn't
7024 handle values smaller than INT_MIN when printed in decimal. */
7025
7026 static bool
7027 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7028 {
7029 if (size == 8 && aligned_p
7030 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7031 {
7032 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7033 INTVAL (x));
7034 return true;
7035 }
7036 return default_assemble_integer (x, size, aligned_p);
7037 }
7038
7039 /* Returns true if register REGNO is used for forming
7040 a memory address in expression X. */
7041
7042 static bool
7043 reg_used_in_mem_p (int regno, rtx x)
7044 {
7045 enum rtx_code code = GET_CODE (x);
7046 int i, j;
7047 const char *fmt;
7048
7049 if (code == MEM)
7050 {
7051 if (refers_to_regno_p (regno, XEXP (x, 0)))
7052 return true;
7053 }
7054 else if (code == SET
7055 && GET_CODE (SET_DEST (x)) == PC)
7056 {
7057 if (refers_to_regno_p (regno, SET_SRC (x)))
7058 return true;
7059 }
7060
7061 fmt = GET_RTX_FORMAT (code);
7062 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7063 {
7064 if (fmt[i] == 'e'
7065 && reg_used_in_mem_p (regno, XEXP (x, i)))
7066 return true;
7067
7068 else if (fmt[i] == 'E')
7069 for (j = 0; j < XVECLEN (x, i); j++)
7070 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7071 return true;
7072 }
7073 return false;
7074 }
7075
7076 /* Returns true if expression DEP_RTX sets an address register
7077 used by instruction INSN to address memory. */
7078
7079 static bool
7080 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7081 {
7082 rtx target, pat;
7083
7084 if (NONJUMP_INSN_P (dep_rtx))
7085 dep_rtx = PATTERN (dep_rtx);
7086
7087 if (GET_CODE (dep_rtx) == SET)
7088 {
7089 target = SET_DEST (dep_rtx);
7090 if (GET_CODE (target) == STRICT_LOW_PART)
7091 target = XEXP (target, 0);
7092 while (GET_CODE (target) == SUBREG)
7093 target = SUBREG_REG (target);
7094
7095 if (GET_CODE (target) == REG)
7096 {
7097 int regno = REGNO (target);
7098
7099 if (s390_safe_attr_type (insn) == TYPE_LA)
7100 {
7101 pat = PATTERN (insn);
7102 if (GET_CODE (pat) == PARALLEL)
7103 {
7104 gcc_assert (XVECLEN (pat, 0) == 2);
7105 pat = XVECEXP (pat, 0, 0);
7106 }
7107 gcc_assert (GET_CODE (pat) == SET);
7108 return refers_to_regno_p (regno, SET_SRC (pat));
7109 }
7110 else if (get_attr_atype (insn) == ATYPE_AGEN)
7111 return reg_used_in_mem_p (regno, PATTERN (insn));
7112 }
7113 }
7114 return false;
7115 }
7116
7117 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7118
7119 int
7120 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7121 {
7122 rtx dep_rtx = PATTERN (dep_insn);
7123 int i;
7124
7125 if (GET_CODE (dep_rtx) == SET
7126 && addr_generation_dependency_p (dep_rtx, insn))
7127 return 1;
7128 else if (GET_CODE (dep_rtx) == PARALLEL)
7129 {
7130 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7131 {
7132 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7133 return 1;
7134 }
7135 }
7136 return 0;
7137 }
7138
7139
7140 /* A C statement (sans semicolon) to update the integer scheduling priority
7141 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7142 reduce the priority to execute INSN later. Do not define this macro if
7143 you do not need to adjust the scheduling priorities of insns.
7144
7145 A STD instruction should be scheduled earlier,
7146 in order to use the bypass. */
7147 static int
7148 s390_adjust_priority (rtx_insn *insn, int priority)
7149 {
7150 if (! INSN_P (insn))
7151 return priority;
7152
7153 if (s390_tune != PROCESSOR_2084_Z990
7154 && s390_tune != PROCESSOR_2094_Z9_109
7155 && s390_tune != PROCESSOR_2097_Z10
7156 && s390_tune != PROCESSOR_2817_Z196
7157 && s390_tune != PROCESSOR_2827_ZEC12
7158 && s390_tune != PROCESSOR_2964_Z13)
7159 return priority;
7160
7161 switch (s390_safe_attr_type (insn))
7162 {
7163 case TYPE_FSTOREDF:
7164 case TYPE_FSTORESF:
7165 priority = priority << 3;
7166 break;
7167 case TYPE_STORE:
7168 case TYPE_STM:
7169 priority = priority << 1;
7170 break;
7171 default:
7172 break;
7173 }
7174 return priority;
7175 }
7176
7177
7178 /* The number of instructions that can be issued per cycle. */
7179
7180 static int
7181 s390_issue_rate (void)
7182 {
7183 switch (s390_tune)
7184 {
7185 case PROCESSOR_2084_Z990:
7186 case PROCESSOR_2094_Z9_109:
7187 case PROCESSOR_2817_Z196:
7188 return 3;
7189 case PROCESSOR_2097_Z10:
7190 return 2;
7191 /* Starting with EC12 we use the sched_reorder hook to take care
7192 of instruction dispatch constraints. The algorithm only
7193 picks the best instruction and assumes only a single
7194 instruction gets issued per cycle. */
7195 case PROCESSOR_2827_ZEC12:
7196 default:
7197 return 1;
7198 }
7199 }
7200
7201 static int
7202 s390_first_cycle_multipass_dfa_lookahead (void)
7203 {
7204 return 4;
7205 }
7206
7207 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7208 Fix up MEMs as required. */
7209
7210 static void
7211 annotate_constant_pool_refs (rtx *x)
7212 {
7213 int i, j;
7214 const char *fmt;
7215
7216 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7217 || !CONSTANT_POOL_ADDRESS_P (*x));
7218
7219 /* Literal pool references can only occur inside a MEM ... */
7220 if (GET_CODE (*x) == MEM)
7221 {
7222 rtx memref = XEXP (*x, 0);
7223
7224 if (GET_CODE (memref) == SYMBOL_REF
7225 && CONSTANT_POOL_ADDRESS_P (memref))
7226 {
7227 rtx base = cfun->machine->base_reg;
7228 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7229 UNSPEC_LTREF);
7230
7231 *x = replace_equiv_address (*x, addr);
7232 return;
7233 }
7234
7235 if (GET_CODE (memref) == CONST
7236 && GET_CODE (XEXP (memref, 0)) == PLUS
7237 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7238 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7239 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7240 {
7241 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7242 rtx sym = XEXP (XEXP (memref, 0), 0);
7243 rtx base = cfun->machine->base_reg;
7244 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7245 UNSPEC_LTREF);
7246
7247 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7248 return;
7249 }
7250 }
7251
7252 /* ... or a load-address type pattern. */
7253 if (GET_CODE (*x) == SET)
7254 {
7255 rtx addrref = SET_SRC (*x);
7256
7257 if (GET_CODE (addrref) == SYMBOL_REF
7258 && CONSTANT_POOL_ADDRESS_P (addrref))
7259 {
7260 rtx base = cfun->machine->base_reg;
7261 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7262 UNSPEC_LTREF);
7263
7264 SET_SRC (*x) = addr;
7265 return;
7266 }
7267
7268 if (GET_CODE (addrref) == CONST
7269 && GET_CODE (XEXP (addrref, 0)) == PLUS
7270 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7271 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7272 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7273 {
7274 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7275 rtx sym = XEXP (XEXP (addrref, 0), 0);
7276 rtx base = cfun->machine->base_reg;
7277 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7278 UNSPEC_LTREF);
7279
7280 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7281 return;
7282 }
7283 }
7284
7285 /* Annotate LTREL_BASE as well. */
7286 if (GET_CODE (*x) == UNSPEC
7287 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7288 {
7289 rtx base = cfun->machine->base_reg;
7290 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7291 UNSPEC_LTREL_BASE);
7292 return;
7293 }
7294
7295 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7296 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7297 {
7298 if (fmt[i] == 'e')
7299 {
7300 annotate_constant_pool_refs (&XEXP (*x, i));
7301 }
7302 else if (fmt[i] == 'E')
7303 {
7304 for (j = 0; j < XVECLEN (*x, i); j++)
7305 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7306 }
7307 }
7308 }
7309
7310 /* Split all branches that exceed the maximum distance.
7311 Returns true if this created a new literal pool entry. */
7312
7313 static int
7314 s390_split_branches (void)
7315 {
7316 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7317 int new_literal = 0, ret;
7318 rtx_insn *insn;
7319 rtx pat, target;
7320 rtx *label;
7321
7322 /* We need correct insn addresses. */
7323
7324 shorten_branches (get_insns ());
7325
7326 /* Find all branches that exceed 64KB, and split them. */
7327
7328 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7329 {
7330 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7331 continue;
7332
7333 pat = PATTERN (insn);
7334 if (GET_CODE (pat) == PARALLEL)
7335 pat = XVECEXP (pat, 0, 0);
7336 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7337 continue;
7338
7339 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7340 {
7341 label = &SET_SRC (pat);
7342 }
7343 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7344 {
7345 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7346 label = &XEXP (SET_SRC (pat), 1);
7347 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7348 label = &XEXP (SET_SRC (pat), 2);
7349 else
7350 continue;
7351 }
7352 else
7353 continue;
7354
7355 if (get_attr_length (insn) <= 4)
7356 continue;
7357
7358 /* We are going to use the return register as scratch register,
7359 make sure it will be saved/restored by the prologue/epilogue. */
7360 cfun_frame_layout.save_return_addr_p = 1;
7361
7362 if (!flag_pic)
7363 {
7364 new_literal = 1;
7365 rtx mem = force_const_mem (Pmode, *label);
7366 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7367 insn);
7368 INSN_ADDRESSES_NEW (set_insn, -1);
7369 annotate_constant_pool_refs (&PATTERN (set_insn));
7370
7371 target = temp_reg;
7372 }
7373 else
7374 {
7375 new_literal = 1;
7376 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7377 UNSPEC_LTREL_OFFSET);
7378 target = gen_rtx_CONST (Pmode, target);
7379 target = force_const_mem (Pmode, target);
7380 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7381 insn);
7382 INSN_ADDRESSES_NEW (set_insn, -1);
7383 annotate_constant_pool_refs (&PATTERN (set_insn));
7384
7385 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7386 cfun->machine->base_reg),
7387 UNSPEC_LTREL_BASE);
7388 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7389 }
7390
7391 ret = validate_change (insn, label, target, 0);
7392 gcc_assert (ret);
7393 }
7394
7395 return new_literal;
7396 }
7397
7398
7399 /* Find an annotated literal pool symbol referenced in RTX X,
7400 and store it at REF. Will abort if X contains references to
7401 more than one such pool symbol; multiple references to the same
7402 symbol are allowed, however.
7403
7404 The rtx pointed to by REF must be initialized to NULL_RTX
7405 by the caller before calling this routine. */
7406
7407 static void
7408 find_constant_pool_ref (rtx x, rtx *ref)
7409 {
7410 int i, j;
7411 const char *fmt;
7412
7413 /* Ignore LTREL_BASE references. */
7414 if (GET_CODE (x) == UNSPEC
7415 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7416 return;
7417 /* Likewise POOL_ENTRY insns. */
7418 if (GET_CODE (x) == UNSPEC_VOLATILE
7419 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7420 return;
7421
7422 gcc_assert (GET_CODE (x) != SYMBOL_REF
7423 || !CONSTANT_POOL_ADDRESS_P (x));
7424
7425 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7426 {
7427 rtx sym = XVECEXP (x, 0, 0);
7428 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7429 && CONSTANT_POOL_ADDRESS_P (sym));
7430
7431 if (*ref == NULL_RTX)
7432 *ref = sym;
7433 else
7434 gcc_assert (*ref == sym);
7435
7436 return;
7437 }
7438
7439 fmt = GET_RTX_FORMAT (GET_CODE (x));
7440 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7441 {
7442 if (fmt[i] == 'e')
7443 {
7444 find_constant_pool_ref (XEXP (x, i), ref);
7445 }
7446 else if (fmt[i] == 'E')
7447 {
7448 for (j = 0; j < XVECLEN (x, i); j++)
7449 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7450 }
7451 }
7452 }
7453
7454 /* Replace every reference to the annotated literal pool
7455 symbol REF in X by its base plus OFFSET. */
7456
7457 static void
7458 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7459 {
7460 int i, j;
7461 const char *fmt;
7462
7463 gcc_assert (*x != ref);
7464
7465 if (GET_CODE (*x) == UNSPEC
7466 && XINT (*x, 1) == UNSPEC_LTREF
7467 && XVECEXP (*x, 0, 0) == ref)
7468 {
7469 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7470 return;
7471 }
7472
7473 if (GET_CODE (*x) == PLUS
7474 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7475 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7476 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7477 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7478 {
7479 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7480 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7481 return;
7482 }
7483
7484 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7485 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7486 {
7487 if (fmt[i] == 'e')
7488 {
7489 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7490 }
7491 else if (fmt[i] == 'E')
7492 {
7493 for (j = 0; j < XVECLEN (*x, i); j++)
7494 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7495 }
7496 }
7497 }
7498
7499 /* Check whether X contains an UNSPEC_LTREL_BASE.
7500 Return its constant pool symbol if found, NULL_RTX otherwise. */
7501
7502 static rtx
7503 find_ltrel_base (rtx x)
7504 {
7505 int i, j;
7506 const char *fmt;
7507
7508 if (GET_CODE (x) == UNSPEC
7509 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7510 return XVECEXP (x, 0, 0);
7511
7512 fmt = GET_RTX_FORMAT (GET_CODE (x));
7513 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7514 {
7515 if (fmt[i] == 'e')
7516 {
7517 rtx fnd = find_ltrel_base (XEXP (x, i));
7518 if (fnd)
7519 return fnd;
7520 }
7521 else if (fmt[i] == 'E')
7522 {
7523 for (j = 0; j < XVECLEN (x, i); j++)
7524 {
7525 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7526 if (fnd)
7527 return fnd;
7528 }
7529 }
7530 }
7531
7532 return NULL_RTX;
7533 }
7534
7535 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
7536
7537 static void
7538 replace_ltrel_base (rtx *x)
7539 {
7540 int i, j;
7541 const char *fmt;
7542
7543 if (GET_CODE (*x) == UNSPEC
7544 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7545 {
7546 *x = XVECEXP (*x, 0, 1);
7547 return;
7548 }
7549
7550 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7551 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7552 {
7553 if (fmt[i] == 'e')
7554 {
7555 replace_ltrel_base (&XEXP (*x, i));
7556 }
7557 else if (fmt[i] == 'E')
7558 {
7559 for (j = 0; j < XVECLEN (*x, i); j++)
7560 replace_ltrel_base (&XVECEXP (*x, i, j));
7561 }
7562 }
7563 }
7564
7565
7566 /* We keep a list of constants which we have to add to internal
7567 constant tables in the middle of large functions. */
7568
7569 #define NR_C_MODES 31
7570 machine_mode constant_modes[NR_C_MODES] =
7571 {
7572 TFmode, TImode, TDmode,
7573 V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
7574 DFmode, DImode, DDmode,
7575 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7576 SFmode, SImode, SDmode,
7577 V4QImode, V2HImode, V1SImode, V1SFmode,
7578 HImode,
7579 V2QImode, V1HImode,
7580 QImode,
7581 V1QImode
7582 };
7583
7584 struct constant
7585 {
7586 struct constant *next;
7587 rtx value;
7588 rtx_code_label *label;
7589 };
7590
7591 struct constant_pool
7592 {
7593 struct constant_pool *next;
7594 rtx_insn *first_insn;
7595 rtx_insn *pool_insn;
7596 bitmap insns;
7597 rtx_insn *emit_pool_after;
7598
7599 struct constant *constants[NR_C_MODES];
7600 struct constant *execute;
7601 rtx_code_label *label;
7602 int size;
7603 };
7604
7605 /* Allocate new constant_pool structure. */
7606
7607 static struct constant_pool *
7608 s390_alloc_pool (void)
7609 {
7610 struct constant_pool *pool;
7611 int i;
7612
7613 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7614 pool->next = NULL;
7615 for (i = 0; i < NR_C_MODES; i++)
7616 pool->constants[i] = NULL;
7617
7618 pool->execute = NULL;
7619 pool->label = gen_label_rtx ();
7620 pool->first_insn = NULL;
7621 pool->pool_insn = NULL;
7622 pool->insns = BITMAP_ALLOC (NULL);
7623 pool->size = 0;
7624 pool->emit_pool_after = NULL;
7625
7626 return pool;
7627 }
7628
7629 /* Create new constant pool covering instructions starting at INSN
7630 and chain it to the end of POOL_LIST. */
7631
7632 static struct constant_pool *
7633 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7634 {
7635 struct constant_pool *pool, **prev;
7636
7637 pool = s390_alloc_pool ();
7638 pool->first_insn = insn;
7639
7640 for (prev = pool_list; *prev; prev = &(*prev)->next)
7641 ;
7642 *prev = pool;
7643
7644 return pool;
7645 }
7646
7647 /* End range of instructions covered by POOL at INSN and emit
7648 placeholder insn representing the pool. */
7649
7650 static void
7651 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7652 {
7653 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7654
7655 if (!insn)
7656 insn = get_last_insn ();
7657
7658 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7659 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7660 }
7661
7662 /* Add INSN to the list of insns covered by POOL. */
7663
7664 static void
7665 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7666 {
7667 bitmap_set_bit (pool->insns, INSN_UID (insn));
7668 }
7669
7670 /* Return pool out of POOL_LIST that covers INSN. */
7671
7672 static struct constant_pool *
7673 s390_find_pool (struct constant_pool *pool_list, rtx insn)
7674 {
7675 struct constant_pool *pool;
7676
7677 for (pool = pool_list; pool; pool = pool->next)
7678 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7679 break;
7680
7681 return pool;
7682 }
7683
7684 /* Add constant VAL of mode MODE to the constant pool POOL. */
7685
7686 static void
7687 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7688 {
7689 struct constant *c;
7690 int i;
7691
7692 for (i = 0; i < NR_C_MODES; i++)
7693 if (constant_modes[i] == mode)
7694 break;
7695 gcc_assert (i != NR_C_MODES);
7696
7697 for (c = pool->constants[i]; c != NULL; c = c->next)
7698 if (rtx_equal_p (val, c->value))
7699 break;
7700
7701 if (c == NULL)
7702 {
7703 c = (struct constant *) xmalloc (sizeof *c);
7704 c->value = val;
7705 c->label = gen_label_rtx ();
7706 c->next = pool->constants[i];
7707 pool->constants[i] = c;
7708 pool->size += GET_MODE_SIZE (mode);
7709 }
7710 }
7711
7712 /* Return an rtx that represents the offset of X from the start of
7713 pool POOL. */
7714
7715 static rtx
7716 s390_pool_offset (struct constant_pool *pool, rtx x)
7717 {
7718 rtx label;
7719
7720 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
7721 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
7722 UNSPEC_POOL_OFFSET);
7723 return gen_rtx_CONST (GET_MODE (x), x);
7724 }
7725
7726 /* Find constant VAL of mode MODE in the constant pool POOL.
7727 Return an RTX describing the distance from the start of
7728 the pool to the location of the new constant. */
7729
7730 static rtx
7731 s390_find_constant (struct constant_pool *pool, rtx val,
7732 machine_mode mode)
7733 {
7734 struct constant *c;
7735 int i;
7736
7737 for (i = 0; i < NR_C_MODES; i++)
7738 if (constant_modes[i] == mode)
7739 break;
7740 gcc_assert (i != NR_C_MODES);
7741
7742 for (c = pool->constants[i]; c != NULL; c = c->next)
7743 if (rtx_equal_p (val, c->value))
7744 break;
7745
7746 gcc_assert (c);
7747
7748 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7749 }
7750
7751 /* Check whether INSN is an execute. Return the label_ref to its
7752 execute target template if so, NULL_RTX otherwise. */
7753
7754 static rtx
7755 s390_execute_label (rtx insn)
7756 {
7757 if (NONJUMP_INSN_P (insn)
7758 && GET_CODE (PATTERN (insn)) == PARALLEL
7759 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
7760 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
7761 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
7762
7763 return NULL_RTX;
7764 }
7765
7766 /* Add execute target for INSN to the constant pool POOL. */
7767
7768 static void
7769 s390_add_execute (struct constant_pool *pool, rtx insn)
7770 {
7771 struct constant *c;
7772
7773 for (c = pool->execute; c != NULL; c = c->next)
7774 if (INSN_UID (insn) == INSN_UID (c->value))
7775 break;
7776
7777 if (c == NULL)
7778 {
7779 c = (struct constant *) xmalloc (sizeof *c);
7780 c->value = insn;
7781 c->label = gen_label_rtx ();
7782 c->next = pool->execute;
7783 pool->execute = c;
7784 pool->size += 6;
7785 }
7786 }
7787
7788 /* Find execute target for INSN in the constant pool POOL.
7789 Return an RTX describing the distance from the start of
7790 the pool to the location of the execute target. */
7791
7792 static rtx
7793 s390_find_execute (struct constant_pool *pool, rtx insn)
7794 {
7795 struct constant *c;
7796
7797 for (c = pool->execute; c != NULL; c = c->next)
7798 if (INSN_UID (insn) == INSN_UID (c->value))
7799 break;
7800
7801 gcc_assert (c);
7802
7803 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7804 }
7805
7806 /* For an execute INSN, extract the execute target template. */
7807
7808 static rtx
7809 s390_execute_target (rtx insn)
7810 {
7811 rtx pattern = PATTERN (insn);
7812 gcc_assert (s390_execute_label (insn));
7813
7814 if (XVECLEN (pattern, 0) == 2)
7815 {
7816 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
7817 }
7818 else
7819 {
7820 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
7821 int i;
7822
7823 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
7824 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
7825
7826 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
7827 }
7828
7829 return pattern;
7830 }
7831
7832 /* Indicate that INSN cannot be duplicated. This is the case for
7833 execute insns that carry a unique label. */
7834
7835 static bool
7836 s390_cannot_copy_insn_p (rtx_insn *insn)
7837 {
7838 rtx label = s390_execute_label (insn);
7839 return label && label != const0_rtx;
7840 }
7841
7842 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
7843 do not emit the pool base label. */
7844
7845 static void
7846 s390_dump_pool (struct constant_pool *pool, bool remote_label)
7847 {
7848 struct constant *c;
7849 rtx_insn *insn = pool->pool_insn;
7850 int i;
7851
7852 /* Switch to rodata section. */
7853 if (TARGET_CPU_ZARCH)
7854 {
7855 insn = emit_insn_after (gen_pool_section_start (), insn);
7856 INSN_ADDRESSES_NEW (insn, -1);
7857 }
7858
7859 /* Ensure minimum pool alignment. */
7860 if (TARGET_CPU_ZARCH)
7861 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
7862 else
7863 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
7864 INSN_ADDRESSES_NEW (insn, -1);
7865
7866 /* Emit pool base label. */
7867 if (!remote_label)
7868 {
7869 insn = emit_label_after (pool->label, insn);
7870 INSN_ADDRESSES_NEW (insn, -1);
7871 }
7872
7873 /* Dump constants in descending alignment requirement order,
7874 ensuring proper alignment for every constant. */
7875 for (i = 0; i < NR_C_MODES; i++)
7876 for (c = pool->constants[i]; c; c = c->next)
7877 {
7878 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
7879 rtx value = copy_rtx (c->value);
7880 if (GET_CODE (value) == CONST
7881 && GET_CODE (XEXP (value, 0)) == UNSPEC
7882 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
7883 && XVECLEN (XEXP (value, 0), 0) == 1)
7884 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
7885
7886 insn = emit_label_after (c->label, insn);
7887 INSN_ADDRESSES_NEW (insn, -1);
7888
7889 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
7890 gen_rtvec (1, value),
7891 UNSPECV_POOL_ENTRY);
7892 insn = emit_insn_after (value, insn);
7893 INSN_ADDRESSES_NEW (insn, -1);
7894 }
7895
7896 /* Ensure minimum alignment for instructions. */
7897 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
7898 INSN_ADDRESSES_NEW (insn, -1);
7899
7900 /* Output in-pool execute template insns. */
7901 for (c = pool->execute; c; c = c->next)
7902 {
7903 insn = emit_label_after (c->label, insn);
7904 INSN_ADDRESSES_NEW (insn, -1);
7905
7906 insn = emit_insn_after (s390_execute_target (c->value), insn);
7907 INSN_ADDRESSES_NEW (insn, -1);
7908 }
7909
7910 /* Switch back to previous section. */
7911 if (TARGET_CPU_ZARCH)
7912 {
7913 insn = emit_insn_after (gen_pool_section_end (), insn);
7914 INSN_ADDRESSES_NEW (insn, -1);
7915 }
7916
7917 insn = emit_barrier_after (insn);
7918 INSN_ADDRESSES_NEW (insn, -1);
7919
7920 /* Remove placeholder insn. */
7921 remove_insn (pool->pool_insn);
7922 }
7923
7924 /* Free all memory used by POOL. */
7925
7926 static void
7927 s390_free_pool (struct constant_pool *pool)
7928 {
7929 struct constant *c, *next;
7930 int i;
7931
7932 for (i = 0; i < NR_C_MODES; i++)
7933 for (c = pool->constants[i]; c; c = next)
7934 {
7935 next = c->next;
7936 free (c);
7937 }
7938
7939 for (c = pool->execute; c; c = next)
7940 {
7941 next = c->next;
7942 free (c);
7943 }
7944
7945 BITMAP_FREE (pool->insns);
7946 free (pool);
7947 }
7948
7949
7950 /* Collect main literal pool. Return NULL on overflow. */
7951
7952 static struct constant_pool *
7953 s390_mainpool_start (void)
7954 {
7955 struct constant_pool *pool;
7956 rtx_insn *insn;
7957
7958 pool = s390_alloc_pool ();
7959
7960 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7961 {
7962 if (NONJUMP_INSN_P (insn)
7963 && GET_CODE (PATTERN (insn)) == SET
7964 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
7965 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
7966 {
7967 /* There might be two main_pool instructions if base_reg
7968 is call-clobbered; one for shrink-wrapped code and one
7969 for the rest. We want to keep the first. */
7970 if (pool->pool_insn)
7971 {
7972 insn = PREV_INSN (insn);
7973 delete_insn (NEXT_INSN (insn));
7974 continue;
7975 }
7976 pool->pool_insn = insn;
7977 }
7978
7979 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
7980 {
7981 s390_add_execute (pool, insn);
7982 }
7983 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7984 {
7985 rtx pool_ref = NULL_RTX;
7986 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7987 if (pool_ref)
7988 {
7989 rtx constant = get_pool_constant (pool_ref);
7990 machine_mode mode = get_pool_mode (pool_ref);
7991 s390_add_constant (pool, constant, mode);
7992 }
7993 }
7994
7995 /* If hot/cold partitioning is enabled we have to make sure that
7996 the literal pool is emitted in the same section where the
7997 initialization of the literal pool base pointer takes place.
7998 emit_pool_after is only used in the non-overflow case on non
7999 Z cpus where we can emit the literal pool at the end of the
8000 function body within the text section. */
8001 if (NOTE_P (insn)
8002 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8003 && !pool->emit_pool_after)
8004 pool->emit_pool_after = PREV_INSN (insn);
8005 }
8006
8007 gcc_assert (pool->pool_insn || pool->size == 0);
8008
8009 if (pool->size >= 4096)
8010 {
8011 /* We're going to chunkify the pool, so remove the main
8012 pool placeholder insn. */
8013 remove_insn (pool->pool_insn);
8014
8015 s390_free_pool (pool);
8016 pool = NULL;
8017 }
8018
8019 /* If the functions ends with the section where the literal pool
8020 should be emitted set the marker to its end. */
8021 if (pool && !pool->emit_pool_after)
8022 pool->emit_pool_after = get_last_insn ();
8023
8024 return pool;
8025 }
8026
8027 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8028 Modify the current function to output the pool constants as well as
8029 the pool register setup instruction. */
8030
8031 static void
8032 s390_mainpool_finish (struct constant_pool *pool)
8033 {
8034 rtx base_reg = cfun->machine->base_reg;
8035
8036 /* If the pool is empty, we're done. */
8037 if (pool->size == 0)
8038 {
8039 /* We don't actually need a base register after all. */
8040 cfun->machine->base_reg = NULL_RTX;
8041
8042 if (pool->pool_insn)
8043 remove_insn (pool->pool_insn);
8044 s390_free_pool (pool);
8045 return;
8046 }
8047
8048 /* We need correct insn addresses. */
8049 shorten_branches (get_insns ());
8050
8051 /* On zSeries, we use a LARL to load the pool register. The pool is
8052 located in the .rodata section, so we emit it after the function. */
8053 if (TARGET_CPU_ZARCH)
8054 {
8055 rtx set = gen_main_base_64 (base_reg, pool->label);
8056 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8057 INSN_ADDRESSES_NEW (insn, -1);
8058 remove_insn (pool->pool_insn);
8059
8060 insn = get_last_insn ();
8061 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8062 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8063
8064 s390_dump_pool (pool, 0);
8065 }
8066
8067 /* On S/390, if the total size of the function's code plus literal pool
8068 does not exceed 4096 bytes, we use BASR to set up a function base
8069 pointer, and emit the literal pool at the end of the function. */
8070 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8071 + pool->size + 8 /* alignment slop */ < 4096)
8072 {
8073 rtx set = gen_main_base_31_small (base_reg, pool->label);
8074 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8075 INSN_ADDRESSES_NEW (insn, -1);
8076 remove_insn (pool->pool_insn);
8077
8078 insn = emit_label_after (pool->label, insn);
8079 INSN_ADDRESSES_NEW (insn, -1);
8080
8081 /* emit_pool_after will be set by s390_mainpool_start to the
8082 last insn of the section where the literal pool should be
8083 emitted. */
8084 insn = pool->emit_pool_after;
8085
8086 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8087 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8088
8089 s390_dump_pool (pool, 1);
8090 }
8091
8092 /* Otherwise, we emit an inline literal pool and use BASR to branch
8093 over it, setting up the pool register at the same time. */
8094 else
8095 {
8096 rtx_code_label *pool_end = gen_label_rtx ();
8097
8098 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8099 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8100 JUMP_LABEL (insn) = pool_end;
8101 INSN_ADDRESSES_NEW (insn, -1);
8102 remove_insn (pool->pool_insn);
8103
8104 insn = emit_label_after (pool->label, insn);
8105 INSN_ADDRESSES_NEW (insn, -1);
8106
8107 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8108 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8109
8110 insn = emit_label_after (pool_end, pool->pool_insn);
8111 INSN_ADDRESSES_NEW (insn, -1);
8112
8113 s390_dump_pool (pool, 1);
8114 }
8115
8116
8117 /* Replace all literal pool references. */
8118
8119 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8120 {
8121 if (INSN_P (insn))
8122 replace_ltrel_base (&PATTERN (insn));
8123
8124 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8125 {
8126 rtx addr, pool_ref = NULL_RTX;
8127 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8128 if (pool_ref)
8129 {
8130 if (s390_execute_label (insn))
8131 addr = s390_find_execute (pool, insn);
8132 else
8133 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8134 get_pool_mode (pool_ref));
8135
8136 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8137 INSN_CODE (insn) = -1;
8138 }
8139 }
8140 }
8141
8142
8143 /* Free the pool. */
8144 s390_free_pool (pool);
8145 }
8146
8147 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8148 We have decided we cannot use this pool, so revert all changes
8149 to the current function that were done by s390_mainpool_start. */
8150 static void
8151 s390_mainpool_cancel (struct constant_pool *pool)
8152 {
8153 /* We didn't actually change the instruction stream, so simply
8154 free the pool memory. */
8155 s390_free_pool (pool);
8156 }
8157
8158
8159 /* Chunkify the literal pool. */
8160
8161 #define S390_POOL_CHUNK_MIN 0xc00
8162 #define S390_POOL_CHUNK_MAX 0xe00
8163
8164 static struct constant_pool *
8165 s390_chunkify_start (void)
8166 {
8167 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8168 int extra_size = 0;
8169 bitmap far_labels;
8170 rtx pending_ltrel = NULL_RTX;
8171 rtx_insn *insn;
8172
8173 rtx (*gen_reload_base) (rtx, rtx) =
8174 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8175
8176
8177 /* We need correct insn addresses. */
8178
8179 shorten_branches (get_insns ());
8180
8181 /* Scan all insns and move literals to pool chunks. */
8182
8183 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8184 {
8185 bool section_switch_p = false;
8186
8187 /* Check for pending LTREL_BASE. */
8188 if (INSN_P (insn))
8189 {
8190 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8191 if (ltrel_base)
8192 {
8193 gcc_assert (ltrel_base == pending_ltrel);
8194 pending_ltrel = NULL_RTX;
8195 }
8196 }
8197
8198 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8199 {
8200 if (!curr_pool)
8201 curr_pool = s390_start_pool (&pool_list, insn);
8202
8203 s390_add_execute (curr_pool, insn);
8204 s390_add_pool_insn (curr_pool, insn);
8205 }
8206 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8207 {
8208 rtx pool_ref = NULL_RTX;
8209 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8210 if (pool_ref)
8211 {
8212 rtx constant = get_pool_constant (pool_ref);
8213 machine_mode mode = get_pool_mode (pool_ref);
8214
8215 if (!curr_pool)
8216 curr_pool = s390_start_pool (&pool_list, insn);
8217
8218 s390_add_constant (curr_pool, constant, mode);
8219 s390_add_pool_insn (curr_pool, insn);
8220
8221 /* Don't split the pool chunk between a LTREL_OFFSET load
8222 and the corresponding LTREL_BASE. */
8223 if (GET_CODE (constant) == CONST
8224 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8225 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8226 {
8227 gcc_assert (!pending_ltrel);
8228 pending_ltrel = pool_ref;
8229 }
8230 }
8231 }
8232
8233 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8234 {
8235 if (curr_pool)
8236 s390_add_pool_insn (curr_pool, insn);
8237 /* An LTREL_BASE must follow within the same basic block. */
8238 gcc_assert (!pending_ltrel);
8239 }
8240
8241 if (NOTE_P (insn))
8242 switch (NOTE_KIND (insn))
8243 {
8244 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8245 section_switch_p = true;
8246 break;
8247 case NOTE_INSN_VAR_LOCATION:
8248 case NOTE_INSN_CALL_ARG_LOCATION:
8249 continue;
8250 default:
8251 break;
8252 }
8253
8254 if (!curr_pool
8255 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8256 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8257 continue;
8258
8259 if (TARGET_CPU_ZARCH)
8260 {
8261 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8262 continue;
8263
8264 s390_end_pool (curr_pool, NULL);
8265 curr_pool = NULL;
8266 }
8267 else
8268 {
8269 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8270 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8271 + extra_size;
8272
8273 /* We will later have to insert base register reload insns.
8274 Those will have an effect on code size, which we need to
8275 consider here. This calculation makes rather pessimistic
8276 worst-case assumptions. */
8277 if (LABEL_P (insn))
8278 extra_size += 6;
8279
8280 if (chunk_size < S390_POOL_CHUNK_MIN
8281 && curr_pool->size < S390_POOL_CHUNK_MIN
8282 && !section_switch_p)
8283 continue;
8284
8285 /* Pool chunks can only be inserted after BARRIERs ... */
8286 if (BARRIER_P (insn))
8287 {
8288 s390_end_pool (curr_pool, insn);
8289 curr_pool = NULL;
8290 extra_size = 0;
8291 }
8292
8293 /* ... so if we don't find one in time, create one. */
8294 else if (chunk_size > S390_POOL_CHUNK_MAX
8295 || curr_pool->size > S390_POOL_CHUNK_MAX
8296 || section_switch_p)
8297 {
8298 rtx_insn *label, *jump, *barrier, *next, *prev;
8299
8300 if (!section_switch_p)
8301 {
8302 /* We can insert the barrier only after a 'real' insn. */
8303 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8304 continue;
8305 if (get_attr_length (insn) == 0)
8306 continue;
8307 /* Don't separate LTREL_BASE from the corresponding
8308 LTREL_OFFSET load. */
8309 if (pending_ltrel)
8310 continue;
8311 next = insn;
8312 do
8313 {
8314 insn = next;
8315 next = NEXT_INSN (insn);
8316 }
8317 while (next
8318 && NOTE_P (next)
8319 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8320 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8321 }
8322 else
8323 {
8324 gcc_assert (!pending_ltrel);
8325
8326 /* The old pool has to end before the section switch
8327 note in order to make it part of the current
8328 section. */
8329 insn = PREV_INSN (insn);
8330 }
8331
8332 label = gen_label_rtx ();
8333 prev = insn;
8334 if (prev && NOTE_P (prev))
8335 prev = prev_nonnote_insn (prev);
8336 if (prev)
8337 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8338 INSN_LOCATION (prev));
8339 else
8340 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8341 barrier = emit_barrier_after (jump);
8342 insn = emit_label_after (label, barrier);
8343 JUMP_LABEL (jump) = label;
8344 LABEL_NUSES (label) = 1;
8345
8346 INSN_ADDRESSES_NEW (jump, -1);
8347 INSN_ADDRESSES_NEW (barrier, -1);
8348 INSN_ADDRESSES_NEW (insn, -1);
8349
8350 s390_end_pool (curr_pool, barrier);
8351 curr_pool = NULL;
8352 extra_size = 0;
8353 }
8354 }
8355 }
8356
8357 if (curr_pool)
8358 s390_end_pool (curr_pool, NULL);
8359 gcc_assert (!pending_ltrel);
8360
8361 /* Find all labels that are branched into
8362 from an insn belonging to a different chunk. */
8363
8364 far_labels = BITMAP_ALLOC (NULL);
8365
8366 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8367 {
8368 rtx_jump_table_data *table;
8369
8370 /* Labels marked with LABEL_PRESERVE_P can be target
8371 of non-local jumps, so we have to mark them.
8372 The same holds for named labels.
8373
8374 Don't do that, however, if it is the label before
8375 a jump table. */
8376
8377 if (LABEL_P (insn)
8378 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8379 {
8380 rtx_insn *vec_insn = NEXT_INSN (insn);
8381 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8382 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8383 }
8384 /* Check potential targets in a table jump (casesi_jump). */
8385 else if (tablejump_p (insn, NULL, &table))
8386 {
8387 rtx vec_pat = PATTERN (table);
8388 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8389
8390 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8391 {
8392 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8393
8394 if (s390_find_pool (pool_list, label)
8395 != s390_find_pool (pool_list, insn))
8396 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8397 }
8398 }
8399 /* If we have a direct jump (conditional or unconditional),
8400 check all potential targets. */
8401 else if (JUMP_P (insn))
8402 {
8403 rtx pat = PATTERN (insn);
8404
8405 if (GET_CODE (pat) == PARALLEL)
8406 pat = XVECEXP (pat, 0, 0);
8407
8408 if (GET_CODE (pat) == SET)
8409 {
8410 rtx label = JUMP_LABEL (insn);
8411 if (label && !ANY_RETURN_P (label))
8412 {
8413 if (s390_find_pool (pool_list, label)
8414 != s390_find_pool (pool_list, insn))
8415 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8416 }
8417 }
8418 }
8419 }
8420
8421 /* Insert base register reload insns before every pool. */
8422
8423 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8424 {
8425 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8426 curr_pool->label);
8427 rtx_insn *insn = curr_pool->first_insn;
8428 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8429 }
8430
8431 /* Insert base register reload insns at every far label. */
8432
8433 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8434 if (LABEL_P (insn)
8435 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8436 {
8437 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8438 if (pool)
8439 {
8440 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8441 pool->label);
8442 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8443 }
8444 }
8445
8446
8447 BITMAP_FREE (far_labels);
8448
8449
8450 /* Recompute insn addresses. */
8451
8452 init_insn_lengths ();
8453 shorten_branches (get_insns ());
8454
8455 return pool_list;
8456 }
8457
8458 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8459 After we have decided to use this list, finish implementing
8460 all changes to the current function as required. */
8461
8462 static void
8463 s390_chunkify_finish (struct constant_pool *pool_list)
8464 {
8465 struct constant_pool *curr_pool = NULL;
8466 rtx_insn *insn;
8467
8468
8469 /* Replace all literal pool references. */
8470
8471 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8472 {
8473 if (INSN_P (insn))
8474 replace_ltrel_base (&PATTERN (insn));
8475
8476 curr_pool = s390_find_pool (pool_list, insn);
8477 if (!curr_pool)
8478 continue;
8479
8480 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8481 {
8482 rtx addr, pool_ref = NULL_RTX;
8483 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8484 if (pool_ref)
8485 {
8486 if (s390_execute_label (insn))
8487 addr = s390_find_execute (curr_pool, insn);
8488 else
8489 addr = s390_find_constant (curr_pool,
8490 get_pool_constant (pool_ref),
8491 get_pool_mode (pool_ref));
8492
8493 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8494 INSN_CODE (insn) = -1;
8495 }
8496 }
8497 }
8498
8499 /* Dump out all literal pools. */
8500
8501 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8502 s390_dump_pool (curr_pool, 0);
8503
8504 /* Free pool list. */
8505
8506 while (pool_list)
8507 {
8508 struct constant_pool *next = pool_list->next;
8509 s390_free_pool (pool_list);
8510 pool_list = next;
8511 }
8512 }
8513
8514 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8515 We have decided we cannot use this list, so revert all changes
8516 to the current function that were done by s390_chunkify_start. */
8517
8518 static void
8519 s390_chunkify_cancel (struct constant_pool *pool_list)
8520 {
8521 struct constant_pool *curr_pool = NULL;
8522 rtx_insn *insn;
8523
8524 /* Remove all pool placeholder insns. */
8525
8526 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8527 {
8528 /* Did we insert an extra barrier? Remove it. */
8529 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8530 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8531 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8532
8533 if (jump && JUMP_P (jump)
8534 && barrier && BARRIER_P (barrier)
8535 && label && LABEL_P (label)
8536 && GET_CODE (PATTERN (jump)) == SET
8537 && SET_DEST (PATTERN (jump)) == pc_rtx
8538 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8539 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8540 {
8541 remove_insn (jump);
8542 remove_insn (barrier);
8543 remove_insn (label);
8544 }
8545
8546 remove_insn (curr_pool->pool_insn);
8547 }
8548
8549 /* Remove all base register reload insns. */
8550
8551 for (insn = get_insns (); insn; )
8552 {
8553 rtx_insn *next_insn = NEXT_INSN (insn);
8554
8555 if (NONJUMP_INSN_P (insn)
8556 && GET_CODE (PATTERN (insn)) == SET
8557 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8558 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8559 remove_insn (insn);
8560
8561 insn = next_insn;
8562 }
8563
8564 /* Free pool list. */
8565
8566 while (pool_list)
8567 {
8568 struct constant_pool *next = pool_list->next;
8569 s390_free_pool (pool_list);
8570 pool_list = next;
8571 }
8572 }
8573
8574 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8575
8576 void
8577 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8578 {
8579 REAL_VALUE_TYPE r;
8580
8581 switch (GET_MODE_CLASS (mode))
8582 {
8583 case MODE_FLOAT:
8584 case MODE_DECIMAL_FLOAT:
8585 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8586
8587 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
8588 assemble_real (r, mode, align);
8589 break;
8590
8591 case MODE_INT:
8592 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8593 mark_symbol_refs_as_used (exp);
8594 break;
8595
8596 case MODE_VECTOR_INT:
8597 case MODE_VECTOR_FLOAT:
8598 {
8599 int i;
8600 machine_mode inner_mode;
8601 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8602
8603 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8604 for (i = 0; i < XVECLEN (exp, 0); i++)
8605 s390_output_pool_entry (XVECEXP (exp, 0, i),
8606 inner_mode,
8607 i == 0
8608 ? align
8609 : GET_MODE_BITSIZE (inner_mode));
8610 }
8611 break;
8612
8613 default:
8614 gcc_unreachable ();
8615 }
8616 }
8617
8618
8619 /* Return an RTL expression representing the value of the return address
8620 for the frame COUNT steps up from the current frame. FRAME is the
8621 frame pointer of that frame. */
8622
8623 rtx
8624 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8625 {
8626 int offset;
8627 rtx addr;
8628
8629 /* Without backchain, we fail for all but the current frame. */
8630
8631 if (!TARGET_BACKCHAIN && count > 0)
8632 return NULL_RTX;
8633
8634 /* For the current frame, we need to make sure the initial
8635 value of RETURN_REGNUM is actually saved. */
8636
8637 if (count == 0)
8638 {
8639 /* On non-z architectures branch splitting could overwrite r14. */
8640 if (TARGET_CPU_ZARCH)
8641 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8642 else
8643 {
8644 cfun_frame_layout.save_return_addr_p = true;
8645 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8646 }
8647 }
8648
8649 if (TARGET_PACKED_STACK)
8650 offset = -2 * UNITS_PER_LONG;
8651 else
8652 offset = RETURN_REGNUM * UNITS_PER_LONG;
8653
8654 addr = plus_constant (Pmode, frame, offset);
8655 addr = memory_address (Pmode, addr);
8656 return gen_rtx_MEM (Pmode, addr);
8657 }
8658
8659 /* Return an RTL expression representing the back chain stored in
8660 the current stack frame. */
8661
8662 rtx
8663 s390_back_chain_rtx (void)
8664 {
8665 rtx chain;
8666
8667 gcc_assert (TARGET_BACKCHAIN);
8668
8669 if (TARGET_PACKED_STACK)
8670 chain = plus_constant (Pmode, stack_pointer_rtx,
8671 STACK_POINTER_OFFSET - UNITS_PER_LONG);
8672 else
8673 chain = stack_pointer_rtx;
8674
8675 chain = gen_rtx_MEM (Pmode, chain);
8676 return chain;
8677 }
8678
8679 /* Find first call clobbered register unused in a function.
8680 This could be used as base register in a leaf function
8681 or for holding the return address before epilogue. */
8682
8683 static int
8684 find_unused_clobbered_reg (void)
8685 {
8686 int i;
8687 for (i = 0; i < 6; i++)
8688 if (!df_regs_ever_live_p (i))
8689 return i;
8690 return 0;
8691 }
8692
8693
8694 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
8695 clobbered hard regs in SETREG. */
8696
8697 static void
8698 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8699 {
8700 char *regs_ever_clobbered = (char *)data;
8701 unsigned int i, regno;
8702 machine_mode mode = GET_MODE (setreg);
8703
8704 if (GET_CODE (setreg) == SUBREG)
8705 {
8706 rtx inner = SUBREG_REG (setreg);
8707 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
8708 return;
8709 regno = subreg_regno (setreg);
8710 }
8711 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
8712 regno = REGNO (setreg);
8713 else
8714 return;
8715
8716 for (i = regno;
8717 i < regno + HARD_REGNO_NREGS (regno, mode);
8718 i++)
8719 regs_ever_clobbered[i] = 1;
8720 }
8721
8722 /* Walks through all basic blocks of the current function looking
8723 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
8724 of the passed integer array REGS_EVER_CLOBBERED are set to one for
8725 each of those regs. */
8726
8727 static void
8728 s390_regs_ever_clobbered (char regs_ever_clobbered[])
8729 {
8730 basic_block cur_bb;
8731 rtx_insn *cur_insn;
8732 unsigned int i;
8733
8734 memset (regs_ever_clobbered, 0, 32);
8735
8736 /* For non-leaf functions we have to consider all call clobbered regs to be
8737 clobbered. */
8738 if (!crtl->is_leaf)
8739 {
8740 for (i = 0; i < 32; i++)
8741 regs_ever_clobbered[i] = call_really_used_regs[i];
8742 }
8743
8744 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
8745 this work is done by liveness analysis (mark_regs_live_at_end).
8746 Special care is needed for functions containing landing pads. Landing pads
8747 may use the eh registers, but the code which sets these registers is not
8748 contained in that function. Hence s390_regs_ever_clobbered is not able to
8749 deal with this automatically. */
8750 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
8751 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
8752 if (crtl->calls_eh_return
8753 || (cfun->machine->has_landing_pad_p
8754 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
8755 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
8756
8757 /* For nonlocal gotos all call-saved registers have to be saved.
8758 This flag is also set for the unwinding code in libgcc.
8759 See expand_builtin_unwind_init. For regs_ever_live this is done by
8760 reload. */
8761 if (crtl->saves_all_registers)
8762 for (i = 0; i < 32; i++)
8763 if (!call_really_used_regs[i])
8764 regs_ever_clobbered[i] = 1;
8765
8766 FOR_EACH_BB_FN (cur_bb, cfun)
8767 {
8768 FOR_BB_INSNS (cur_bb, cur_insn)
8769 {
8770 rtx pat;
8771
8772 if (!INSN_P (cur_insn))
8773 continue;
8774
8775 pat = PATTERN (cur_insn);
8776
8777 /* Ignore GPR restore insns. */
8778 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
8779 {
8780 if (GET_CODE (pat) == SET
8781 && GENERAL_REG_P (SET_DEST (pat)))
8782 {
8783 /* lgdr */
8784 if (GET_MODE (SET_SRC (pat)) == DImode
8785 && FP_REG_P (SET_SRC (pat)))
8786 continue;
8787
8788 /* l / lg */
8789 if (GET_CODE (SET_SRC (pat)) == MEM)
8790 continue;
8791 }
8792
8793 /* lm / lmg */
8794 if (GET_CODE (pat) == PARALLEL
8795 && load_multiple_operation (pat, VOIDmode))
8796 continue;
8797 }
8798
8799 note_stores (pat,
8800 s390_reg_clobbered_rtx,
8801 regs_ever_clobbered);
8802 }
8803 }
8804 }
8805
8806 /* Determine the frame area which actually has to be accessed
8807 in the function epilogue. The values are stored at the
8808 given pointers AREA_BOTTOM (address of the lowest used stack
8809 address) and AREA_TOP (address of the first item which does
8810 not belong to the stack frame). */
8811
8812 static void
8813 s390_frame_area (int *area_bottom, int *area_top)
8814 {
8815 int b, t;
8816
8817 b = INT_MAX;
8818 t = INT_MIN;
8819
8820 if (cfun_frame_layout.first_restore_gpr != -1)
8821 {
8822 b = (cfun_frame_layout.gprs_offset
8823 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
8824 t = b + (cfun_frame_layout.last_restore_gpr
8825 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
8826 }
8827
8828 if (TARGET_64BIT && cfun_save_high_fprs_p)
8829 {
8830 b = MIN (b, cfun_frame_layout.f8_offset);
8831 t = MAX (t, (cfun_frame_layout.f8_offset
8832 + cfun_frame_layout.high_fprs * 8));
8833 }
8834
8835 if (!TARGET_64BIT)
8836 {
8837 if (cfun_fpr_save_p (FPR4_REGNUM))
8838 {
8839 b = MIN (b, cfun_frame_layout.f4_offset);
8840 t = MAX (t, cfun_frame_layout.f4_offset + 8);
8841 }
8842 if (cfun_fpr_save_p (FPR6_REGNUM))
8843 {
8844 b = MIN (b, cfun_frame_layout.f4_offset + 8);
8845 t = MAX (t, cfun_frame_layout.f4_offset + 16);
8846 }
8847 }
8848 *area_bottom = b;
8849 *area_top = t;
8850 }
8851 /* Update gpr_save_slots in the frame layout trying to make use of
8852 FPRs as GPR save slots.
8853 This is a helper routine of s390_register_info. */
8854
8855 static void
8856 s390_register_info_gprtofpr ()
8857 {
8858 int save_reg_slot = FPR0_REGNUM;
8859 int i, j;
8860
8861 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8862 return;
8863
8864 for (i = 15; i >= 6; i--)
8865 {
8866 if (cfun_gpr_save_slot (i) == 0)
8867 continue;
8868
8869 /* Advance to the next FP register which can be used as a
8870 GPR save slot. */
8871 while ((!call_really_used_regs[save_reg_slot]
8872 || df_regs_ever_live_p (save_reg_slot)
8873 || cfun_fpr_save_p (save_reg_slot))
8874 && FP_REGNO_P (save_reg_slot))
8875 save_reg_slot++;
8876 if (!FP_REGNO_P (save_reg_slot))
8877 {
8878 /* We only want to use ldgr/lgdr if we can get rid of
8879 stm/lm entirely. So undo the gpr slot allocation in
8880 case we ran out of FPR save slots. */
8881 for (j = 6; j <= 15; j++)
8882 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
8883 cfun_gpr_save_slot (j) = -1;
8884 break;
8885 }
8886 cfun_gpr_save_slot (i) = save_reg_slot++;
8887 }
8888 }
8889
8890 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
8891 stdarg.
8892 This is a helper routine for s390_register_info. */
8893
8894 static void
8895 s390_register_info_stdarg_fpr ()
8896 {
8897 int i;
8898 int min_fpr;
8899 int max_fpr;
8900
8901 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
8902 f0-f4 for 64 bit. */
8903 if (!cfun->stdarg
8904 || !TARGET_HARD_FLOAT
8905 || !cfun->va_list_fpr_size
8906 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
8907 return;
8908
8909 min_fpr = crtl->args.info.fprs;
8910 max_fpr = min_fpr + cfun->va_list_fpr_size;
8911 if (max_fpr > FP_ARG_NUM_REG)
8912 max_fpr = FP_ARG_NUM_REG;
8913
8914 for (i = min_fpr; i < max_fpr; i++)
8915 cfun_set_fpr_save (i + FPR0_REGNUM);
8916 }
8917
8918 /* Reserve the GPR save slots for GPRs which need to be saved due to
8919 stdarg.
8920 This is a helper routine for s390_register_info. */
8921
8922 static void
8923 s390_register_info_stdarg_gpr ()
8924 {
8925 int i;
8926 int min_gpr;
8927 int max_gpr;
8928
8929 if (!cfun->stdarg
8930 || !cfun->va_list_gpr_size
8931 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
8932 return;
8933
8934 min_gpr = crtl->args.info.gprs;
8935 max_gpr = min_gpr + cfun->va_list_gpr_size;
8936 if (max_gpr > GP_ARG_NUM_REG)
8937 max_gpr = GP_ARG_NUM_REG;
8938
8939 for (i = min_gpr; i < max_gpr; i++)
8940 cfun_gpr_save_slot (2 + i) = -1;
8941 }
8942
8943 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
8944 for registers which need to be saved in function prologue.
8945 This function can be used until the insns emitted for save/restore
8946 of the regs are visible in the RTL stream. */
8947
8948 static void
8949 s390_register_info ()
8950 {
8951 int i, j;
8952 char clobbered_regs[32];
8953
8954 gcc_assert (!epilogue_completed);
8955
8956 if (reload_completed)
8957 /* After reload we rely on our own routine to determine which
8958 registers need saving. */
8959 s390_regs_ever_clobbered (clobbered_regs);
8960 else
8961 /* During reload we use regs_ever_live as a base since reload
8962 does changes in there which we otherwise would not be aware
8963 of. */
8964 for (i = 0; i < 32; i++)
8965 clobbered_regs[i] = df_regs_ever_live_p (i);
8966
8967 for (i = 0; i < 32; i++)
8968 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
8969
8970 /* Mark the call-saved FPRs which need to be saved.
8971 This needs to be done before checking the special GPRs since the
8972 stack pointer usage depends on whether high FPRs have to be saved
8973 or not. */
8974 cfun_frame_layout.fpr_bitmap = 0;
8975 cfun_frame_layout.high_fprs = 0;
8976 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
8977 if (clobbered_regs[i] && !call_really_used_regs[i])
8978 {
8979 cfun_set_fpr_save (i);
8980 if (i >= FPR8_REGNUM)
8981 cfun_frame_layout.high_fprs++;
8982 }
8983
8984 if (flag_pic)
8985 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
8986 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
8987
8988 clobbered_regs[BASE_REGNUM]
8989 |= (cfun->machine->base_reg
8990 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
8991
8992 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
8993 |= !!frame_pointer_needed;
8994
8995 /* On pre z900 machines this might take until machine dependent
8996 reorg to decide.
8997 save_return_addr_p will only be set on non-zarch machines so
8998 there is no risk that r14 goes into an FPR instead of a stack
8999 slot. */
9000 clobbered_regs[RETURN_REGNUM]
9001 |= (!crtl->is_leaf
9002 || TARGET_TPF_PROFILING
9003 || cfun->machine->split_branches_pending_p
9004 || cfun_frame_layout.save_return_addr_p
9005 || crtl->calls_eh_return);
9006
9007 clobbered_regs[STACK_POINTER_REGNUM]
9008 |= (!crtl->is_leaf
9009 || TARGET_TPF_PROFILING
9010 || cfun_save_high_fprs_p
9011 || get_frame_size () > 0
9012 || (reload_completed && cfun_frame_layout.frame_size > 0)
9013 || cfun->calls_alloca);
9014
9015 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
9016
9017 for (i = 6; i < 16; i++)
9018 if (clobbered_regs[i])
9019 cfun_gpr_save_slot (i) = -1;
9020
9021 s390_register_info_stdarg_fpr ();
9022 s390_register_info_gprtofpr ();
9023
9024 /* First find the range of GPRs to be restored. Vararg regs don't
9025 need to be restored so we do it before assigning slots to the
9026 vararg GPRs. */
9027 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9028 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9029 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9030 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9031
9032 /* stdarg functions might need to save GPRs 2 to 6. This might
9033 override the GPR->FPR save decision made above for r6 since
9034 vararg regs must go to the stack. */
9035 s390_register_info_stdarg_gpr ();
9036
9037 /* Now the range of GPRs which need saving. */
9038 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9039 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9040 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9041 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9042 }
9043
9044 /* This function is called by s390_optimize_prologue in order to get
9045 rid of unnecessary GPR save/restore instructions. The register info
9046 for the GPRs is re-computed and the ranges are re-calculated. */
9047
9048 static void
9049 s390_optimize_register_info ()
9050 {
9051 char clobbered_regs[32];
9052 int i, j;
9053
9054 gcc_assert (epilogue_completed);
9055 gcc_assert (!cfun->machine->split_branches_pending_p);
9056
9057 s390_regs_ever_clobbered (clobbered_regs);
9058
9059 for (i = 0; i < 32; i++)
9060 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9061
9062 /* There is still special treatment needed for cases invisible to
9063 s390_regs_ever_clobbered. */
9064 clobbered_regs[RETURN_REGNUM]
9065 |= (TARGET_TPF_PROFILING
9066 /* When expanding builtin_return_addr in ESA mode we do not
9067 know whether r14 will later be needed as scratch reg when
9068 doing branch splitting. So the builtin always accesses the
9069 r14 save slot and we need to stick to the save/restore
9070 decision for r14 even if it turns out that it didn't get
9071 clobbered. */
9072 || cfun_frame_layout.save_return_addr_p
9073 || crtl->calls_eh_return);
9074
9075 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
9076
9077 for (i = 6; i < 16; i++)
9078 if (!clobbered_regs[i])
9079 cfun_gpr_save_slot (i) = 0;
9080
9081 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9082 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9083 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9084 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9085
9086 s390_register_info_stdarg_gpr ();
9087
9088 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9089 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9090 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9091 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9092 }
9093
9094 /* Fill cfun->machine with info about frame of current function. */
9095
9096 static void
9097 s390_frame_info (void)
9098 {
9099 HOST_WIDE_INT lowest_offset;
9100
9101 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9102 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9103
9104 /* The va_arg builtin uses a constant distance of 16 *
9105 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9106 pointer. So even if we are going to save the stack pointer in an
9107 FPR we need the stack space in order to keep the offsets
9108 correct. */
9109 if (cfun->stdarg && cfun_save_arg_fprs_p)
9110 {
9111 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9112
9113 if (cfun_frame_layout.first_save_gpr_slot == -1)
9114 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9115 }
9116
9117 cfun_frame_layout.frame_size = get_frame_size ();
9118 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9119 fatal_error (input_location,
9120 "total size of local variables exceeds architecture limit");
9121
9122 if (!TARGET_PACKED_STACK)
9123 {
9124 /* Fixed stack layout. */
9125 cfun_frame_layout.backchain_offset = 0;
9126 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9127 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9128 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9129 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9130 * UNITS_PER_LONG);
9131 }
9132 else if (TARGET_BACKCHAIN)
9133 {
9134 /* Kernel stack layout - packed stack, backchain, no float */
9135 gcc_assert (TARGET_SOFT_FLOAT);
9136 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9137 - UNITS_PER_LONG);
9138
9139 /* The distance between the backchain and the return address
9140 save slot must not change. So we always need a slot for the
9141 stack pointer which resides in between. */
9142 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9143
9144 cfun_frame_layout.gprs_offset
9145 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9146
9147 /* FPRs will not be saved. Nevertheless pick sane values to
9148 keep area calculations valid. */
9149 cfun_frame_layout.f0_offset =
9150 cfun_frame_layout.f4_offset =
9151 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9152 }
9153 else
9154 {
9155 int num_fprs;
9156
9157 /* Packed stack layout without backchain. */
9158
9159 /* With stdarg FPRs need their dedicated slots. */
9160 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9161 : (cfun_fpr_save_p (FPR4_REGNUM) +
9162 cfun_fpr_save_p (FPR6_REGNUM)));
9163 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9164
9165 num_fprs = (cfun->stdarg ? 2
9166 : (cfun_fpr_save_p (FPR0_REGNUM)
9167 + cfun_fpr_save_p (FPR2_REGNUM)));
9168 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9169
9170 cfun_frame_layout.gprs_offset
9171 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9172
9173 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9174 - cfun_frame_layout.high_fprs * 8);
9175 }
9176
9177 if (cfun_save_high_fprs_p)
9178 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9179
9180 if (!crtl->is_leaf)
9181 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9182
9183 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9184 sized area at the bottom of the stack. This is required also for
9185 leaf functions. When GCC generates a local stack reference it
9186 will always add STACK_POINTER_OFFSET to all these references. */
9187 if (crtl->is_leaf
9188 && !TARGET_TPF_PROFILING
9189 && cfun_frame_layout.frame_size == 0
9190 && !cfun->calls_alloca)
9191 return;
9192
9193 /* Calculate the number of bytes we have used in our own register
9194 save area. With the packed stack layout we can re-use the
9195 remaining bytes for normal stack elements. */
9196
9197 if (TARGET_PACKED_STACK)
9198 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9199 cfun_frame_layout.f4_offset),
9200 cfun_frame_layout.gprs_offset);
9201 else
9202 lowest_offset = 0;
9203
9204 if (TARGET_BACKCHAIN)
9205 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9206
9207 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9208
9209 /* If under 31 bit an odd number of gprs has to be saved we have to
9210 adjust the frame size to sustain 8 byte alignment of stack
9211 frames. */
9212 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9213 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9214 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9215 }
9216
9217 /* Generate frame layout. Fills in register and frame data for the current
9218 function in cfun->machine. This routine can be called multiple times;
9219 it will re-do the complete frame layout every time. */
9220
9221 static void
9222 s390_init_frame_layout (void)
9223 {
9224 HOST_WIDE_INT frame_size;
9225 int base_used;
9226
9227 gcc_assert (!reload_completed);
9228
9229 /* On S/390 machines, we may need to perform branch splitting, which
9230 will require both base and return address register. We have no
9231 choice but to assume we're going to need them until right at the
9232 end of the machine dependent reorg phase. */
9233 if (!TARGET_CPU_ZARCH)
9234 cfun->machine->split_branches_pending_p = true;
9235
9236 do
9237 {
9238 frame_size = cfun_frame_layout.frame_size;
9239
9240 /* Try to predict whether we'll need the base register. */
9241 base_used = cfun->machine->split_branches_pending_p
9242 || crtl->uses_const_pool
9243 || (!DISP_IN_RANGE (frame_size)
9244 && !CONST_OK_FOR_K (frame_size));
9245
9246 /* Decide which register to use as literal pool base. In small
9247 leaf functions, try to use an unused call-clobbered register
9248 as base register to avoid save/restore overhead. */
9249 if (!base_used)
9250 cfun->machine->base_reg = NULL_RTX;
9251 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
9252 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
9253 else
9254 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
9255
9256 s390_register_info ();
9257 s390_frame_info ();
9258 }
9259 while (frame_size != cfun_frame_layout.frame_size);
9260 }
9261
9262 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9263 the TX is nonescaping. A transaction is considered escaping if
9264 there is at least one path from tbegin returning CC0 to the
9265 function exit block without an tend.
9266
9267 The check so far has some limitations:
9268 - only single tbegin/tend BBs are supported
9269 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9270 - when CC is copied to a GPR and the CC0 check is done with the GPR
9271 this is not supported
9272 */
9273
9274 static void
9275 s390_optimize_nonescaping_tx (void)
9276 {
9277 const unsigned int CC0 = 1 << 3;
9278 basic_block tbegin_bb = NULL;
9279 basic_block tend_bb = NULL;
9280 basic_block bb;
9281 rtx_insn *insn;
9282 bool result = true;
9283 int bb_index;
9284 rtx_insn *tbegin_insn = NULL;
9285
9286 if (!cfun->machine->tbegin_p)
9287 return;
9288
9289 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9290 {
9291 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9292
9293 if (!bb)
9294 continue;
9295
9296 FOR_BB_INSNS (bb, insn)
9297 {
9298 rtx ite, cc, pat, target;
9299 unsigned HOST_WIDE_INT mask;
9300
9301 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9302 continue;
9303
9304 pat = PATTERN (insn);
9305
9306 if (GET_CODE (pat) == PARALLEL)
9307 pat = XVECEXP (pat, 0, 0);
9308
9309 if (GET_CODE (pat) != SET
9310 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9311 continue;
9312
9313 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9314 {
9315 rtx_insn *tmp;
9316
9317 tbegin_insn = insn;
9318
9319 /* Just return if the tbegin doesn't have clobbers. */
9320 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9321 return;
9322
9323 if (tbegin_bb != NULL)
9324 return;
9325
9326 /* Find the next conditional jump. */
9327 for (tmp = NEXT_INSN (insn);
9328 tmp != NULL_RTX;
9329 tmp = NEXT_INSN (tmp))
9330 {
9331 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9332 return;
9333 if (!JUMP_P (tmp))
9334 continue;
9335
9336 ite = SET_SRC (PATTERN (tmp));
9337 if (GET_CODE (ite) != IF_THEN_ELSE)
9338 continue;
9339
9340 cc = XEXP (XEXP (ite, 0), 0);
9341 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9342 || GET_MODE (cc) != CCRAWmode
9343 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9344 return;
9345
9346 if (bb->succs->length () != 2)
9347 return;
9348
9349 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9350 if (GET_CODE (XEXP (ite, 0)) == NE)
9351 mask ^= 0xf;
9352
9353 if (mask == CC0)
9354 target = XEXP (ite, 1);
9355 else if (mask == (CC0 ^ 0xf))
9356 target = XEXP (ite, 2);
9357 else
9358 return;
9359
9360 {
9361 edge_iterator ei;
9362 edge e1, e2;
9363
9364 ei = ei_start (bb->succs);
9365 e1 = ei_safe_edge (ei);
9366 ei_next (&ei);
9367 e2 = ei_safe_edge (ei);
9368
9369 if (e2->flags & EDGE_FALLTHRU)
9370 {
9371 e2 = e1;
9372 e1 = ei_safe_edge (ei);
9373 }
9374
9375 if (!(e1->flags & EDGE_FALLTHRU))
9376 return;
9377
9378 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9379 }
9380 if (tmp == BB_END (bb))
9381 break;
9382 }
9383 }
9384
9385 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9386 {
9387 if (tend_bb != NULL)
9388 return;
9389 tend_bb = bb;
9390 }
9391 }
9392 }
9393
9394 /* Either we successfully remove the FPR clobbers here or we are not
9395 able to do anything for this TX. Both cases don't qualify for
9396 another look. */
9397 cfun->machine->tbegin_p = false;
9398
9399 if (tbegin_bb == NULL || tend_bb == NULL)
9400 return;
9401
9402 calculate_dominance_info (CDI_POST_DOMINATORS);
9403 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9404 free_dominance_info (CDI_POST_DOMINATORS);
9405
9406 if (!result)
9407 return;
9408
9409 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9410 gen_rtvec (2,
9411 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9412 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9413 INSN_CODE (tbegin_insn) = -1;
9414 df_insn_rescan (tbegin_insn);
9415
9416 return;
9417 }
9418
9419 /* Return true if it is legal to put a value with MODE into REGNO. */
9420
9421 bool
9422 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9423 {
9424 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9425 return false;
9426
9427 switch (REGNO_REG_CLASS (regno))
9428 {
9429 case VEC_REGS:
9430 return ((GET_MODE_CLASS (mode) == MODE_INT
9431 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9432 || mode == DFmode
9433 || s390_vector_mode_supported_p (mode));
9434 break;
9435 case FP_REGS:
9436 if (TARGET_VX
9437 && ((GET_MODE_CLASS (mode) == MODE_INT
9438 && s390_class_max_nregs (FP_REGS, mode) == 1)
9439 || mode == DFmode
9440 || s390_vector_mode_supported_p (mode)))
9441 return true;
9442
9443 if (REGNO_PAIR_OK (regno, mode))
9444 {
9445 if (mode == SImode || mode == DImode)
9446 return true;
9447
9448 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9449 return true;
9450 }
9451 break;
9452 case ADDR_REGS:
9453 if (FRAME_REGNO_P (regno) && mode == Pmode)
9454 return true;
9455
9456 /* fallthrough */
9457 case GENERAL_REGS:
9458 if (REGNO_PAIR_OK (regno, mode))
9459 {
9460 if (TARGET_ZARCH
9461 || (mode != TFmode && mode != TCmode && mode != TDmode))
9462 return true;
9463 }
9464 break;
9465 case CC_REGS:
9466 if (GET_MODE_CLASS (mode) == MODE_CC)
9467 return true;
9468 break;
9469 case ACCESS_REGS:
9470 if (REGNO_PAIR_OK (regno, mode))
9471 {
9472 if (mode == SImode || mode == Pmode)
9473 return true;
9474 }
9475 break;
9476 default:
9477 return false;
9478 }
9479
9480 return false;
9481 }
9482
9483 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
9484
9485 bool
9486 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9487 {
9488 /* Once we've decided upon a register to use as base register, it must
9489 no longer be used for any other purpose. */
9490 if (cfun->machine->base_reg)
9491 if (REGNO (cfun->machine->base_reg) == old_reg
9492 || REGNO (cfun->machine->base_reg) == new_reg)
9493 return false;
9494
9495 /* Prevent regrename from using call-saved regs which haven't
9496 actually been saved. This is necessary since regrename assumes
9497 the backend save/restore decisions are based on
9498 df_regs_ever_live. Since we have our own routine we have to tell
9499 regrename manually about it. */
9500 if (GENERAL_REGNO_P (new_reg)
9501 && !call_really_used_regs[new_reg]
9502 && cfun_gpr_save_slot (new_reg) == 0)
9503 return false;
9504
9505 return true;
9506 }
9507
9508 /* Return nonzero if register REGNO can be used as a scratch register
9509 in peephole2. */
9510
9511 static bool
9512 s390_hard_regno_scratch_ok (unsigned int regno)
9513 {
9514 /* See s390_hard_regno_rename_ok. */
9515 if (GENERAL_REGNO_P (regno)
9516 && !call_really_used_regs[regno]
9517 && cfun_gpr_save_slot (regno) == 0)
9518 return false;
9519
9520 return true;
9521 }
9522
9523 /* Maximum number of registers to represent a value of mode MODE
9524 in a register of class RCLASS. */
9525
9526 int
9527 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9528 {
9529 int reg_size;
9530 bool reg_pair_required_p = false;
9531
9532 switch (rclass)
9533 {
9534 case FP_REGS:
9535 case VEC_REGS:
9536 reg_size = TARGET_VX ? 16 : 8;
9537
9538 /* TF and TD modes would fit into a VR but we put them into a
9539 register pair since we do not have 128bit FP instructions on
9540 full VRs. */
9541 if (TARGET_VX
9542 && SCALAR_FLOAT_MODE_P (mode)
9543 && GET_MODE_SIZE (mode) >= 16)
9544 reg_pair_required_p = true;
9545
9546 /* Even if complex types would fit into a single FPR/VR we force
9547 them into a register pair to deal with the parts more easily.
9548 (FIXME: What about complex ints?) */
9549 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9550 reg_pair_required_p = true;
9551 break;
9552 case ACCESS_REGS:
9553 reg_size = 4;
9554 break;
9555 default:
9556 reg_size = UNITS_PER_WORD;
9557 break;
9558 }
9559
9560 if (reg_pair_required_p)
9561 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9562
9563 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9564 }
9565
9566 /* Return TRUE if changing mode from FROM to TO should not be allowed
9567 for register class CLASS. */
9568
9569 int
9570 s390_cannot_change_mode_class (machine_mode from_mode,
9571 machine_mode to_mode,
9572 enum reg_class rclass)
9573 {
9574 machine_mode small_mode;
9575 machine_mode big_mode;
9576
9577 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9578 return 0;
9579
9580 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9581 {
9582 small_mode = from_mode;
9583 big_mode = to_mode;
9584 }
9585 else
9586 {
9587 small_mode = to_mode;
9588 big_mode = from_mode;
9589 }
9590
9591 /* Values residing in VRs are little-endian style. All modes are
9592 placed left-aligned in an VR. This means that we cannot allow
9593 switching between modes with differing sizes. Also if the vector
9594 facility is available we still place TFmode values in VR register
9595 pairs, since the only instructions we have operating on TFmodes
9596 only deal with register pairs. Therefore we have to allow DFmode
9597 subregs of TFmodes to enable the TFmode splitters. */
9598 if (reg_classes_intersect_p (VEC_REGS, rclass)
9599 && (GET_MODE_SIZE (small_mode) < 8
9600 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9601 return 1;
9602
9603 /* Likewise for access registers, since they have only half the
9604 word size on 64-bit. */
9605 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9606 return 1;
9607
9608 return 0;
9609 }
9610
9611 /* Return true if we use LRA instead of reload pass. */
9612 static bool
9613 s390_lra_p (void)
9614 {
9615 return s390_lra_flag;
9616 }
9617
9618 /* Return true if register FROM can be eliminated via register TO. */
9619
9620 static bool
9621 s390_can_eliminate (const int from, const int to)
9622 {
9623 /* On zSeries machines, we have not marked the base register as fixed.
9624 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9625 If a function requires the base register, we say here that this
9626 elimination cannot be performed. This will cause reload to free
9627 up the base register (as if it were fixed). On the other hand,
9628 if the current function does *not* require the base register, we
9629 say here the elimination succeeds, which in turn allows reload
9630 to allocate the base register for any other purpose. */
9631 if (from == BASE_REGNUM && to == BASE_REGNUM)
9632 {
9633 if (TARGET_CPU_ZARCH)
9634 {
9635 s390_init_frame_layout ();
9636 return cfun->machine->base_reg == NULL_RTX;
9637 }
9638
9639 return false;
9640 }
9641
9642 /* Everything else must point into the stack frame. */
9643 gcc_assert (to == STACK_POINTER_REGNUM
9644 || to == HARD_FRAME_POINTER_REGNUM);
9645
9646 gcc_assert (from == FRAME_POINTER_REGNUM
9647 || from == ARG_POINTER_REGNUM
9648 || from == RETURN_ADDRESS_POINTER_REGNUM);
9649
9650 /* Make sure we actually saved the return address. */
9651 if (from == RETURN_ADDRESS_POINTER_REGNUM)
9652 if (!crtl->calls_eh_return
9653 && !cfun->stdarg
9654 && !cfun_frame_layout.save_return_addr_p)
9655 return false;
9656
9657 return true;
9658 }
9659
9660 /* Return offset between register FROM and TO initially after prolog. */
9661
9662 HOST_WIDE_INT
9663 s390_initial_elimination_offset (int from, int to)
9664 {
9665 HOST_WIDE_INT offset;
9666
9667 /* ??? Why are we called for non-eliminable pairs? */
9668 if (!s390_can_eliminate (from, to))
9669 return 0;
9670
9671 switch (from)
9672 {
9673 case FRAME_POINTER_REGNUM:
9674 offset = (get_frame_size()
9675 + STACK_POINTER_OFFSET
9676 + crtl->outgoing_args_size);
9677 break;
9678
9679 case ARG_POINTER_REGNUM:
9680 s390_init_frame_layout ();
9681 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
9682 break;
9683
9684 case RETURN_ADDRESS_POINTER_REGNUM:
9685 s390_init_frame_layout ();
9686
9687 if (cfun_frame_layout.first_save_gpr_slot == -1)
9688 {
9689 /* If it turns out that for stdarg nothing went into the reg
9690 save area we also do not need the return address
9691 pointer. */
9692 if (cfun->stdarg && !cfun_save_arg_fprs_p)
9693 return 0;
9694
9695 gcc_unreachable ();
9696 }
9697
9698 /* In order to make the following work it is not necessary for
9699 r14 to have a save slot. It is sufficient if one other GPR
9700 got one. Since the GPRs are always stored without gaps we
9701 are able to calculate where the r14 save slot would
9702 reside. */
9703 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
9704 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
9705 UNITS_PER_LONG);
9706 break;
9707
9708 case BASE_REGNUM:
9709 offset = 0;
9710 break;
9711
9712 default:
9713 gcc_unreachable ();
9714 }
9715
9716 return offset;
9717 }
9718
9719 /* Emit insn to save fpr REGNUM at offset OFFSET relative
9720 to register BASE. Return generated insn. */
9721
9722 static rtx
9723 save_fpr (rtx base, int offset, int regnum)
9724 {
9725 rtx addr;
9726 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9727
9728 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
9729 set_mem_alias_set (addr, get_varargs_alias_set ());
9730 else
9731 set_mem_alias_set (addr, get_frame_alias_set ());
9732
9733 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
9734 }
9735
9736 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
9737 to register BASE. Return generated insn. */
9738
9739 static rtx
9740 restore_fpr (rtx base, int offset, int regnum)
9741 {
9742 rtx addr;
9743 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9744 set_mem_alias_set (addr, get_frame_alias_set ());
9745
9746 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
9747 }
9748
9749 /* Return true if REGNO is a global register, but not one
9750 of the special ones that need to be saved/restored in anyway. */
9751
9752 static inline bool
9753 global_not_special_regno_p (int regno)
9754 {
9755 return (global_regs[regno]
9756 /* These registers are special and need to be
9757 restored in any case. */
9758 && !(regno == STACK_POINTER_REGNUM
9759 || regno == RETURN_REGNUM
9760 || regno == BASE_REGNUM
9761 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9762 }
9763
9764 /* Generate insn to save registers FIRST to LAST into
9765 the register save area located at offset OFFSET
9766 relative to register BASE. */
9767
9768 static rtx
9769 save_gprs (rtx base, int offset, int first, int last)
9770 {
9771 rtx addr, insn, note;
9772 int i;
9773
9774 addr = plus_constant (Pmode, base, offset);
9775 addr = gen_rtx_MEM (Pmode, addr);
9776
9777 set_mem_alias_set (addr, get_frame_alias_set ());
9778
9779 /* Special-case single register. */
9780 if (first == last)
9781 {
9782 if (TARGET_64BIT)
9783 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
9784 else
9785 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
9786
9787 if (!global_not_special_regno_p (first))
9788 RTX_FRAME_RELATED_P (insn) = 1;
9789 return insn;
9790 }
9791
9792
9793 insn = gen_store_multiple (addr,
9794 gen_rtx_REG (Pmode, first),
9795 GEN_INT (last - first + 1));
9796
9797 if (first <= 6 && cfun->stdarg)
9798 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9799 {
9800 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
9801
9802 if (first + i <= 6)
9803 set_mem_alias_set (mem, get_varargs_alias_set ());
9804 }
9805
9806 /* We need to set the FRAME_RELATED flag on all SETs
9807 inside the store-multiple pattern.
9808
9809 However, we must not emit DWARF records for registers 2..5
9810 if they are stored for use by variable arguments ...
9811
9812 ??? Unfortunately, it is not enough to simply not the
9813 FRAME_RELATED flags for those SETs, because the first SET
9814 of the PARALLEL is always treated as if it had the flag
9815 set, even if it does not. Therefore we emit a new pattern
9816 without those registers as REG_FRAME_RELATED_EXPR note. */
9817
9818 if (first >= 6 && !global_not_special_regno_p (first))
9819 {
9820 rtx pat = PATTERN (insn);
9821
9822 for (i = 0; i < XVECLEN (pat, 0); i++)
9823 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
9824 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
9825 0, i)))))
9826 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
9827
9828 RTX_FRAME_RELATED_P (insn) = 1;
9829 }
9830 else if (last >= 6)
9831 {
9832 int start;
9833
9834 for (start = first >= 6 ? first : 6; start <= last; start++)
9835 if (!global_not_special_regno_p (start))
9836 break;
9837
9838 if (start > last)
9839 return insn;
9840
9841 addr = plus_constant (Pmode, base,
9842 offset + (start - first) * UNITS_PER_LONG);
9843
9844 if (start == last)
9845 {
9846 if (TARGET_64BIT)
9847 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
9848 gen_rtx_REG (Pmode, start));
9849 else
9850 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
9851 gen_rtx_REG (Pmode, start));
9852 note = PATTERN (note);
9853
9854 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9855 RTX_FRAME_RELATED_P (insn) = 1;
9856
9857 return insn;
9858 }
9859
9860 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
9861 gen_rtx_REG (Pmode, start),
9862 GEN_INT (last - start + 1));
9863 note = PATTERN (note);
9864
9865 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9866
9867 for (i = 0; i < XVECLEN (note, 0); i++)
9868 if (GET_CODE (XVECEXP (note, 0, i)) == SET
9869 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
9870 0, i)))))
9871 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
9872
9873 RTX_FRAME_RELATED_P (insn) = 1;
9874 }
9875
9876 return insn;
9877 }
9878
9879 /* Generate insn to restore registers FIRST to LAST from
9880 the register save area located at offset OFFSET
9881 relative to register BASE. */
9882
9883 static rtx
9884 restore_gprs (rtx base, int offset, int first, int last)
9885 {
9886 rtx addr, insn;
9887
9888 addr = plus_constant (Pmode, base, offset);
9889 addr = gen_rtx_MEM (Pmode, addr);
9890 set_mem_alias_set (addr, get_frame_alias_set ());
9891
9892 /* Special-case single register. */
9893 if (first == last)
9894 {
9895 if (TARGET_64BIT)
9896 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
9897 else
9898 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
9899
9900 RTX_FRAME_RELATED_P (insn) = 1;
9901 return insn;
9902 }
9903
9904 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
9905 addr,
9906 GEN_INT (last - first + 1));
9907 RTX_FRAME_RELATED_P (insn) = 1;
9908 return insn;
9909 }
9910
9911 /* Return insn sequence to load the GOT register. */
9912
9913 static GTY(()) rtx got_symbol;
9914 rtx_insn *
9915 s390_load_got (void)
9916 {
9917 rtx_insn *insns;
9918
9919 /* We cannot use pic_offset_table_rtx here since we use this
9920 function also for non-pic if __tls_get_offset is called and in
9921 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
9922 aren't usable. */
9923 rtx got_rtx = gen_rtx_REG (Pmode, 12);
9924
9925 if (!got_symbol)
9926 {
9927 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9928 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
9929 }
9930
9931 start_sequence ();
9932
9933 if (TARGET_CPU_ZARCH)
9934 {
9935 emit_move_insn (got_rtx, got_symbol);
9936 }
9937 else
9938 {
9939 rtx offset;
9940
9941 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
9942 UNSPEC_LTREL_OFFSET);
9943 offset = gen_rtx_CONST (Pmode, offset);
9944 offset = force_const_mem (Pmode, offset);
9945
9946 emit_move_insn (got_rtx, offset);
9947
9948 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
9949 UNSPEC_LTREL_BASE);
9950 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
9951
9952 emit_move_insn (got_rtx, offset);
9953 }
9954
9955 insns = get_insns ();
9956 end_sequence ();
9957 return insns;
9958 }
9959
9960 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
9961 and the change to the stack pointer. */
9962
9963 static void
9964 s390_emit_stack_tie (void)
9965 {
9966 rtx mem = gen_frame_mem (BLKmode,
9967 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
9968
9969 emit_insn (gen_stack_tie (mem));
9970 }
9971
9972 /* Copy GPRS into FPR save slots. */
9973
9974 static void
9975 s390_save_gprs_to_fprs (void)
9976 {
9977 int i;
9978
9979 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9980 return;
9981
9982 for (i = 6; i < 16; i++)
9983 {
9984 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
9985 {
9986 rtx_insn *insn =
9987 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
9988 gen_rtx_REG (DImode, i));
9989 RTX_FRAME_RELATED_P (insn) = 1;
9990 }
9991 }
9992 }
9993
9994 /* Restore GPRs from FPR save slots. */
9995
9996 static void
9997 s390_restore_gprs_from_fprs (void)
9998 {
9999 int i;
10000
10001 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10002 return;
10003
10004 for (i = 6; i < 16; i++)
10005 {
10006 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10007 {
10008 rtx_insn *insn =
10009 emit_move_insn (gen_rtx_REG (DImode, i),
10010 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10011 df_set_regs_ever_live (i, true);
10012 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10013 if (i == STACK_POINTER_REGNUM)
10014 add_reg_note (insn, REG_CFA_DEF_CFA,
10015 plus_constant (Pmode, stack_pointer_rtx,
10016 STACK_POINTER_OFFSET));
10017 RTX_FRAME_RELATED_P (insn) = 1;
10018 }
10019 }
10020 }
10021
10022
10023 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10024 generation. */
10025
10026 namespace {
10027
10028 const pass_data pass_data_s390_early_mach =
10029 {
10030 RTL_PASS, /* type */
10031 "early_mach", /* name */
10032 OPTGROUP_NONE, /* optinfo_flags */
10033 TV_MACH_DEP, /* tv_id */
10034 0, /* properties_required */
10035 0, /* properties_provided */
10036 0, /* properties_destroyed */
10037 0, /* todo_flags_start */
10038 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10039 };
10040
10041 class pass_s390_early_mach : public rtl_opt_pass
10042 {
10043 public:
10044 pass_s390_early_mach (gcc::context *ctxt)
10045 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10046 {}
10047
10048 /* opt_pass methods: */
10049 virtual unsigned int execute (function *);
10050
10051 }; // class pass_s390_early_mach
10052
10053 unsigned int
10054 pass_s390_early_mach::execute (function *fun)
10055 {
10056 rtx_insn *insn;
10057
10058 /* Try to get rid of the FPR clobbers. */
10059 s390_optimize_nonescaping_tx ();
10060
10061 /* Re-compute register info. */
10062 s390_register_info ();
10063
10064 /* If we're using a base register, ensure that it is always valid for
10065 the first non-prologue instruction. */
10066 if (fun->machine->base_reg)
10067 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10068
10069 /* Annotate all constant pool references to let the scheduler know
10070 they implicitly use the base register. */
10071 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10072 if (INSN_P (insn))
10073 {
10074 annotate_constant_pool_refs (&PATTERN (insn));
10075 df_insn_rescan (insn);
10076 }
10077 return 0;
10078 }
10079
10080 } // anon namespace
10081
10082 /* Expand the prologue into a bunch of separate insns. */
10083
10084 void
10085 s390_emit_prologue (void)
10086 {
10087 rtx insn, addr;
10088 rtx temp_reg;
10089 int i;
10090 int offset;
10091 int next_fpr = 0;
10092
10093 /* Choose best register to use for temp use within prologue.
10094 See below for why TPF must use the register 1. */
10095
10096 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10097 && !crtl->is_leaf
10098 && !TARGET_TPF_PROFILING)
10099 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10100 else
10101 temp_reg = gen_rtx_REG (Pmode, 1);
10102
10103 s390_save_gprs_to_fprs ();
10104
10105 /* Save call saved gprs. */
10106 if (cfun_frame_layout.first_save_gpr != -1)
10107 {
10108 insn = save_gprs (stack_pointer_rtx,
10109 cfun_frame_layout.gprs_offset +
10110 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10111 - cfun_frame_layout.first_save_gpr_slot),
10112 cfun_frame_layout.first_save_gpr,
10113 cfun_frame_layout.last_save_gpr);
10114 emit_insn (insn);
10115 }
10116
10117 /* Dummy insn to mark literal pool slot. */
10118
10119 if (cfun->machine->base_reg)
10120 emit_insn (gen_main_pool (cfun->machine->base_reg));
10121
10122 offset = cfun_frame_layout.f0_offset;
10123
10124 /* Save f0 and f2. */
10125 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10126 {
10127 if (cfun_fpr_save_p (i))
10128 {
10129 save_fpr (stack_pointer_rtx, offset, i);
10130 offset += 8;
10131 }
10132 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10133 offset += 8;
10134 }
10135
10136 /* Save f4 and f6. */
10137 offset = cfun_frame_layout.f4_offset;
10138 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10139 {
10140 if (cfun_fpr_save_p (i))
10141 {
10142 insn = save_fpr (stack_pointer_rtx, offset, i);
10143 offset += 8;
10144
10145 /* If f4 and f6 are call clobbered they are saved due to
10146 stdargs and therefore are not frame related. */
10147 if (!call_really_used_regs[i])
10148 RTX_FRAME_RELATED_P (insn) = 1;
10149 }
10150 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10151 offset += 8;
10152 }
10153
10154 if (TARGET_PACKED_STACK
10155 && cfun_save_high_fprs_p
10156 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10157 {
10158 offset = (cfun_frame_layout.f8_offset
10159 + (cfun_frame_layout.high_fprs - 1) * 8);
10160
10161 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10162 if (cfun_fpr_save_p (i))
10163 {
10164 insn = save_fpr (stack_pointer_rtx, offset, i);
10165
10166 RTX_FRAME_RELATED_P (insn) = 1;
10167 offset -= 8;
10168 }
10169 if (offset >= cfun_frame_layout.f8_offset)
10170 next_fpr = i;
10171 }
10172
10173 if (!TARGET_PACKED_STACK)
10174 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10175
10176 if (flag_stack_usage_info)
10177 current_function_static_stack_size = cfun_frame_layout.frame_size;
10178
10179 /* Decrement stack pointer. */
10180
10181 if (cfun_frame_layout.frame_size > 0)
10182 {
10183 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10184 rtx real_frame_off;
10185
10186 if (s390_stack_size)
10187 {
10188 HOST_WIDE_INT stack_guard;
10189
10190 if (s390_stack_guard)
10191 stack_guard = s390_stack_guard;
10192 else
10193 {
10194 /* If no value for stack guard is provided the smallest power of 2
10195 larger than the current frame size is chosen. */
10196 stack_guard = 1;
10197 while (stack_guard < cfun_frame_layout.frame_size)
10198 stack_guard <<= 1;
10199 }
10200
10201 if (cfun_frame_layout.frame_size >= s390_stack_size)
10202 {
10203 warning (0, "frame size of function %qs is %wd"
10204 " bytes exceeding user provided stack limit of "
10205 "%d bytes. "
10206 "An unconditional trap is added.",
10207 current_function_name(), cfun_frame_layout.frame_size,
10208 s390_stack_size);
10209 emit_insn (gen_trap ());
10210 }
10211 else
10212 {
10213 /* stack_guard has to be smaller than s390_stack_size.
10214 Otherwise we would emit an AND with zero which would
10215 not match the test under mask pattern. */
10216 if (stack_guard >= s390_stack_size)
10217 {
10218 warning (0, "frame size of function %qs is %wd"
10219 " bytes which is more than half the stack size. "
10220 "The dynamic check would not be reliable. "
10221 "No check emitted for this function.",
10222 current_function_name(),
10223 cfun_frame_layout.frame_size);
10224 }
10225 else
10226 {
10227 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10228 & ~(stack_guard - 1));
10229
10230 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10231 GEN_INT (stack_check_mask));
10232 if (TARGET_64BIT)
10233 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10234 t, const0_rtx),
10235 t, const0_rtx, const0_rtx));
10236 else
10237 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10238 t, const0_rtx),
10239 t, const0_rtx, const0_rtx));
10240 }
10241 }
10242 }
10243
10244 if (s390_warn_framesize > 0
10245 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10246 warning (0, "frame size of %qs is %wd bytes",
10247 current_function_name (), cfun_frame_layout.frame_size);
10248
10249 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10250 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10251
10252 /* Save incoming stack pointer into temp reg. */
10253 if (TARGET_BACKCHAIN || next_fpr)
10254 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10255
10256 /* Subtract frame size from stack pointer. */
10257
10258 if (DISP_IN_RANGE (INTVAL (frame_off)))
10259 {
10260 insn = gen_rtx_SET (stack_pointer_rtx,
10261 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10262 frame_off));
10263 insn = emit_insn (insn);
10264 }
10265 else
10266 {
10267 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10268 frame_off = force_const_mem (Pmode, frame_off);
10269
10270 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10271 annotate_constant_pool_refs (&PATTERN (insn));
10272 }
10273
10274 RTX_FRAME_RELATED_P (insn) = 1;
10275 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10276 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10277 gen_rtx_SET (stack_pointer_rtx,
10278 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10279 real_frame_off)));
10280
10281 /* Set backchain. */
10282
10283 if (TARGET_BACKCHAIN)
10284 {
10285 if (cfun_frame_layout.backchain_offset)
10286 addr = gen_rtx_MEM (Pmode,
10287 plus_constant (Pmode, stack_pointer_rtx,
10288 cfun_frame_layout.backchain_offset));
10289 else
10290 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10291 set_mem_alias_set (addr, get_frame_alias_set ());
10292 insn = emit_insn (gen_move_insn (addr, temp_reg));
10293 }
10294
10295 /* If we support non-call exceptions (e.g. for Java),
10296 we need to make sure the backchain pointer is set up
10297 before any possibly trapping memory access. */
10298 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10299 {
10300 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10301 emit_clobber (addr);
10302 }
10303 }
10304
10305 /* Save fprs 8 - 15 (64 bit ABI). */
10306
10307 if (cfun_save_high_fprs_p && next_fpr)
10308 {
10309 /* If the stack might be accessed through a different register
10310 we have to make sure that the stack pointer decrement is not
10311 moved below the use of the stack slots. */
10312 s390_emit_stack_tie ();
10313
10314 insn = emit_insn (gen_add2_insn (temp_reg,
10315 GEN_INT (cfun_frame_layout.f8_offset)));
10316
10317 offset = 0;
10318
10319 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10320 if (cfun_fpr_save_p (i))
10321 {
10322 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10323 cfun_frame_layout.frame_size
10324 + cfun_frame_layout.f8_offset
10325 + offset);
10326
10327 insn = save_fpr (temp_reg, offset, i);
10328 offset += 8;
10329 RTX_FRAME_RELATED_P (insn) = 1;
10330 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10331 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10332 gen_rtx_REG (DFmode, i)));
10333 }
10334 }
10335
10336 /* Set frame pointer, if needed. */
10337
10338 if (frame_pointer_needed)
10339 {
10340 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10341 RTX_FRAME_RELATED_P (insn) = 1;
10342 }
10343
10344 /* Set up got pointer, if needed. */
10345
10346 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10347 {
10348 rtx_insn *insns = s390_load_got ();
10349
10350 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10351 annotate_constant_pool_refs (&PATTERN (insn));
10352
10353 emit_insn (insns);
10354 }
10355
10356 if (TARGET_TPF_PROFILING)
10357 {
10358 /* Generate a BAS instruction to serve as a function
10359 entry intercept to facilitate the use of tracing
10360 algorithms located at the branch target. */
10361 emit_insn (gen_prologue_tpf ());
10362
10363 /* Emit a blockage here so that all code
10364 lies between the profiling mechanisms. */
10365 emit_insn (gen_blockage ());
10366 }
10367 }
10368
10369 /* Expand the epilogue into a bunch of separate insns. */
10370
10371 void
10372 s390_emit_epilogue (bool sibcall)
10373 {
10374 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10375 int area_bottom, area_top, offset = 0;
10376 int next_offset;
10377 rtvec p;
10378 int i;
10379
10380 if (TARGET_TPF_PROFILING)
10381 {
10382
10383 /* Generate a BAS instruction to serve as a function
10384 entry intercept to facilitate the use of tracing
10385 algorithms located at the branch target. */
10386
10387 /* Emit a blockage here so that all code
10388 lies between the profiling mechanisms. */
10389 emit_insn (gen_blockage ());
10390
10391 emit_insn (gen_epilogue_tpf ());
10392 }
10393
10394 /* Check whether to use frame or stack pointer for restore. */
10395
10396 frame_pointer = (frame_pointer_needed
10397 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10398
10399 s390_frame_area (&area_bottom, &area_top);
10400
10401 /* Check whether we can access the register save area.
10402 If not, increment the frame pointer as required. */
10403
10404 if (area_top <= area_bottom)
10405 {
10406 /* Nothing to restore. */
10407 }
10408 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10409 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10410 {
10411 /* Area is in range. */
10412 offset = cfun_frame_layout.frame_size;
10413 }
10414 else
10415 {
10416 rtx insn, frame_off, cfa;
10417
10418 offset = area_bottom < 0 ? -area_bottom : 0;
10419 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10420
10421 cfa = gen_rtx_SET (frame_pointer,
10422 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10423 if (DISP_IN_RANGE (INTVAL (frame_off)))
10424 {
10425 insn = gen_rtx_SET (frame_pointer,
10426 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10427 insn = emit_insn (insn);
10428 }
10429 else
10430 {
10431 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10432 frame_off = force_const_mem (Pmode, frame_off);
10433
10434 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10435 annotate_constant_pool_refs (&PATTERN (insn));
10436 }
10437 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10438 RTX_FRAME_RELATED_P (insn) = 1;
10439 }
10440
10441 /* Restore call saved fprs. */
10442
10443 if (TARGET_64BIT)
10444 {
10445 if (cfun_save_high_fprs_p)
10446 {
10447 next_offset = cfun_frame_layout.f8_offset;
10448 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10449 {
10450 if (cfun_fpr_save_p (i))
10451 {
10452 restore_fpr (frame_pointer,
10453 offset + next_offset, i);
10454 cfa_restores
10455 = alloc_reg_note (REG_CFA_RESTORE,
10456 gen_rtx_REG (DFmode, i), cfa_restores);
10457 next_offset += 8;
10458 }
10459 }
10460 }
10461
10462 }
10463 else
10464 {
10465 next_offset = cfun_frame_layout.f4_offset;
10466 /* f4, f6 */
10467 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10468 {
10469 if (cfun_fpr_save_p (i))
10470 {
10471 restore_fpr (frame_pointer,
10472 offset + next_offset, i);
10473 cfa_restores
10474 = alloc_reg_note (REG_CFA_RESTORE,
10475 gen_rtx_REG (DFmode, i), cfa_restores);
10476 next_offset += 8;
10477 }
10478 else if (!TARGET_PACKED_STACK)
10479 next_offset += 8;
10480 }
10481
10482 }
10483
10484 /* Return register. */
10485
10486 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10487
10488 /* Restore call saved gprs. */
10489
10490 if (cfun_frame_layout.first_restore_gpr != -1)
10491 {
10492 rtx insn, addr;
10493 int i;
10494
10495 /* Check for global register and save them
10496 to stack location from where they get restored. */
10497
10498 for (i = cfun_frame_layout.first_restore_gpr;
10499 i <= cfun_frame_layout.last_restore_gpr;
10500 i++)
10501 {
10502 if (global_not_special_regno_p (i))
10503 {
10504 addr = plus_constant (Pmode, frame_pointer,
10505 offset + cfun_frame_layout.gprs_offset
10506 + (i - cfun_frame_layout.first_save_gpr_slot)
10507 * UNITS_PER_LONG);
10508 addr = gen_rtx_MEM (Pmode, addr);
10509 set_mem_alias_set (addr, get_frame_alias_set ());
10510 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10511 }
10512 else
10513 cfa_restores
10514 = alloc_reg_note (REG_CFA_RESTORE,
10515 gen_rtx_REG (Pmode, i), cfa_restores);
10516 }
10517
10518 if (! sibcall)
10519 {
10520 /* Fetch return address from stack before load multiple,
10521 this will do good for scheduling.
10522
10523 Only do this if we already decided that r14 needs to be
10524 saved to a stack slot. (And not just because r14 happens to
10525 be in between two GPRs which need saving.) Otherwise it
10526 would be difficult to take that decision back in
10527 s390_optimize_prologue. */
10528 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
10529 {
10530 int return_regnum = find_unused_clobbered_reg();
10531 if (!return_regnum)
10532 return_regnum = 4;
10533 return_reg = gen_rtx_REG (Pmode, return_regnum);
10534
10535 addr = plus_constant (Pmode, frame_pointer,
10536 offset + cfun_frame_layout.gprs_offset
10537 + (RETURN_REGNUM
10538 - cfun_frame_layout.first_save_gpr_slot)
10539 * UNITS_PER_LONG);
10540 addr = gen_rtx_MEM (Pmode, addr);
10541 set_mem_alias_set (addr, get_frame_alias_set ());
10542 emit_move_insn (return_reg, addr);
10543
10544 /* Once we did that optimization we have to make sure
10545 s390_optimize_prologue does not try to remove the
10546 store of r14 since we will not be able to find the
10547 load issued here. */
10548 cfun_frame_layout.save_return_addr_p = true;
10549 }
10550 }
10551
10552 insn = restore_gprs (frame_pointer,
10553 offset + cfun_frame_layout.gprs_offset
10554 + (cfun_frame_layout.first_restore_gpr
10555 - cfun_frame_layout.first_save_gpr_slot)
10556 * UNITS_PER_LONG,
10557 cfun_frame_layout.first_restore_gpr,
10558 cfun_frame_layout.last_restore_gpr);
10559 insn = emit_insn (insn);
10560 REG_NOTES (insn) = cfa_restores;
10561 add_reg_note (insn, REG_CFA_DEF_CFA,
10562 plus_constant (Pmode, stack_pointer_rtx,
10563 STACK_POINTER_OFFSET));
10564 RTX_FRAME_RELATED_P (insn) = 1;
10565 }
10566
10567 s390_restore_gprs_from_fprs ();
10568
10569 if (! sibcall)
10570 {
10571
10572 /* Return to caller. */
10573
10574 p = rtvec_alloc (2);
10575
10576 RTVEC_ELT (p, 0) = ret_rtx;
10577 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10578 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10579 }
10580 }
10581
10582 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10583
10584 static void
10585 s300_set_up_by_prologue (hard_reg_set_container *regs)
10586 {
10587 if (cfun->machine->base_reg
10588 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10589 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10590 }
10591
10592 /* Return true if the function can use simple_return to return outside
10593 of a shrink-wrapped region. At present shrink-wrapping is supported
10594 in all cases. */
10595
10596 bool
10597 s390_can_use_simple_return_insn (void)
10598 {
10599 return true;
10600 }
10601
10602 /* Return true if the epilogue is guaranteed to contain only a return
10603 instruction and if a direct return can therefore be used instead.
10604 One of the main advantages of using direct return instructions
10605 is that we can then use conditional returns. */
10606
10607 bool
10608 s390_can_use_return_insn (void)
10609 {
10610 int i;
10611
10612 if (!reload_completed)
10613 return false;
10614
10615 if (crtl->profile)
10616 return false;
10617
10618 if (TARGET_TPF_PROFILING)
10619 return false;
10620
10621 for (i = 0; i < 16; i++)
10622 if (cfun_gpr_save_slot (i))
10623 return false;
10624
10625 /* For 31 bit this is not covered by the frame_size check below
10626 since f4, f6 are saved in the register save area without needing
10627 additional stack space. */
10628 if (!TARGET_64BIT
10629 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10630 return false;
10631
10632 if (cfun->machine->base_reg
10633 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10634 return false;
10635
10636 return cfun_frame_layout.frame_size == 0;
10637 }
10638
10639 /* The VX ABI differs for vararg functions. Therefore we need the
10640 prototype of the callee to be available when passing vector type
10641 values. */
10642 static const char *
10643 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10644 {
10645 return ((TARGET_VX_ABI
10646 && typelist == 0
10647 && VECTOR_TYPE_P (TREE_TYPE (val))
10648 && (funcdecl == NULL_TREE
10649 || (TREE_CODE (funcdecl) == FUNCTION_DECL
10650 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10651 ? N_("Vector argument passed to unprototyped function")
10652 : NULL);
10653 }
10654
10655
10656 /* Return the size in bytes of a function argument of
10657 type TYPE and/or mode MODE. At least one of TYPE or
10658 MODE must be specified. */
10659
10660 static int
10661 s390_function_arg_size (machine_mode mode, const_tree type)
10662 {
10663 if (type)
10664 return int_size_in_bytes (type);
10665
10666 /* No type info available for some library calls ... */
10667 if (mode != BLKmode)
10668 return GET_MODE_SIZE (mode);
10669
10670 /* If we have neither type nor mode, abort */
10671 gcc_unreachable ();
10672 }
10673
10674 /* Return true if a function argument of type TYPE and mode MODE
10675 is to be passed in a vector register, if available. */
10676
10677 bool
10678 s390_function_arg_vector (machine_mode mode, const_tree type)
10679 {
10680 if (!TARGET_VX_ABI)
10681 return false;
10682
10683 if (s390_function_arg_size (mode, type) > 16)
10684 return false;
10685
10686 /* No type info available for some library calls ... */
10687 if (!type)
10688 return VECTOR_MODE_P (mode);
10689
10690 /* The ABI says that record types with a single member are treated
10691 just like that member would be. */
10692 while (TREE_CODE (type) == RECORD_TYPE)
10693 {
10694 tree field, single = NULL_TREE;
10695
10696 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10697 {
10698 if (TREE_CODE (field) != FIELD_DECL)
10699 continue;
10700
10701 if (single == NULL_TREE)
10702 single = TREE_TYPE (field);
10703 else
10704 return false;
10705 }
10706
10707 if (single == NULL_TREE)
10708 return false;
10709 else
10710 {
10711 /* If the field declaration adds extra byte due to
10712 e.g. padding this is not accepted as vector type. */
10713 if (int_size_in_bytes (single) <= 0
10714 || int_size_in_bytes (single) != int_size_in_bytes (type))
10715 return false;
10716 type = single;
10717 }
10718 }
10719
10720 return VECTOR_TYPE_P (type);
10721 }
10722
10723 /* Return true if a function argument of type TYPE and mode MODE
10724 is to be passed in a floating-point register, if available. */
10725
10726 static bool
10727 s390_function_arg_float (machine_mode mode, const_tree type)
10728 {
10729 if (s390_function_arg_size (mode, type) > 8)
10730 return false;
10731
10732 /* Soft-float changes the ABI: no floating-point registers are used. */
10733 if (TARGET_SOFT_FLOAT)
10734 return false;
10735
10736 /* No type info available for some library calls ... */
10737 if (!type)
10738 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
10739
10740 /* The ABI says that record types with a single member are treated
10741 just like that member would be. */
10742 while (TREE_CODE (type) == RECORD_TYPE)
10743 {
10744 tree field, single = NULL_TREE;
10745
10746 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10747 {
10748 if (TREE_CODE (field) != FIELD_DECL)
10749 continue;
10750
10751 if (single == NULL_TREE)
10752 single = TREE_TYPE (field);
10753 else
10754 return false;
10755 }
10756
10757 if (single == NULL_TREE)
10758 return false;
10759 else
10760 type = single;
10761 }
10762
10763 return TREE_CODE (type) == REAL_TYPE;
10764 }
10765
10766 /* Return true if a function argument of type TYPE and mode MODE
10767 is to be passed in an integer register, or a pair of integer
10768 registers, if available. */
10769
10770 static bool
10771 s390_function_arg_integer (machine_mode mode, const_tree type)
10772 {
10773 int size = s390_function_arg_size (mode, type);
10774 if (size > 8)
10775 return false;
10776
10777 /* No type info available for some library calls ... */
10778 if (!type)
10779 return GET_MODE_CLASS (mode) == MODE_INT
10780 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
10781
10782 /* We accept small integral (and similar) types. */
10783 if (INTEGRAL_TYPE_P (type)
10784 || POINTER_TYPE_P (type)
10785 || TREE_CODE (type) == NULLPTR_TYPE
10786 || TREE_CODE (type) == OFFSET_TYPE
10787 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
10788 return true;
10789
10790 /* We also accept structs of size 1, 2, 4, 8 that are not
10791 passed in floating-point registers. */
10792 if (AGGREGATE_TYPE_P (type)
10793 && exact_log2 (size) >= 0
10794 && !s390_function_arg_float (mode, type))
10795 return true;
10796
10797 return false;
10798 }
10799
10800 /* Return 1 if a function argument of type TYPE and mode MODE
10801 is to be passed by reference. The ABI specifies that only
10802 structures of size 1, 2, 4, or 8 bytes are passed by value,
10803 all other structures (and complex numbers) are passed by
10804 reference. */
10805
10806 static bool
10807 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
10808 machine_mode mode, const_tree type,
10809 bool named ATTRIBUTE_UNUSED)
10810 {
10811 int size = s390_function_arg_size (mode, type);
10812
10813 if (s390_function_arg_vector (mode, type))
10814 return false;
10815
10816 if (size > 8)
10817 return true;
10818
10819 if (type)
10820 {
10821 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
10822 return true;
10823
10824 if (TREE_CODE (type) == COMPLEX_TYPE
10825 || TREE_CODE (type) == VECTOR_TYPE)
10826 return true;
10827 }
10828
10829 return false;
10830 }
10831
10832 /* Update the data in CUM to advance over an argument of mode MODE and
10833 data type TYPE. (TYPE is null for libcalls where that information
10834 may not be available.). The boolean NAMED specifies whether the
10835 argument is a named argument (as opposed to an unnamed argument
10836 matching an ellipsis). */
10837
10838 static void
10839 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
10840 const_tree type, bool named)
10841 {
10842 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10843
10844 if (s390_function_arg_vector (mode, type))
10845 {
10846 /* We are called for unnamed vector stdarg arguments which are
10847 passed on the stack. In this case this hook does not have to
10848 do anything since stack arguments are tracked by common
10849 code. */
10850 if (!named)
10851 return;
10852 cum->vrs += 1;
10853 }
10854 else if (s390_function_arg_float (mode, type))
10855 {
10856 cum->fprs += 1;
10857 }
10858 else if (s390_function_arg_integer (mode, type))
10859 {
10860 int size = s390_function_arg_size (mode, type);
10861 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
10862 }
10863 else
10864 gcc_unreachable ();
10865 }
10866
10867 /* Define where to put the arguments to a function.
10868 Value is zero to push the argument on the stack,
10869 or a hard register in which to store the argument.
10870
10871 MODE is the argument's machine mode.
10872 TYPE is the data type of the argument (as a tree).
10873 This is null for libcalls where that information may
10874 not be available.
10875 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10876 the preceding args and about the function being called.
10877 NAMED is nonzero if this argument is a named parameter
10878 (otherwise it is an extra parameter matching an ellipsis).
10879
10880 On S/390, we use general purpose registers 2 through 6 to
10881 pass integer, pointer, and certain structure arguments, and
10882 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
10883 to pass floating point arguments. All remaining arguments
10884 are pushed to the stack. */
10885
10886 static rtx
10887 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
10888 const_tree type, bool named)
10889 {
10890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10891
10892
10893 if (s390_function_arg_vector (mode, type))
10894 {
10895 /* Vector arguments being part of the ellipsis are passed on the
10896 stack. */
10897 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
10898 return NULL_RTX;
10899
10900 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
10901 }
10902 else if (s390_function_arg_float (mode, type))
10903 {
10904 if (cum->fprs + 1 > FP_ARG_NUM_REG)
10905 return NULL_RTX;
10906 else
10907 return gen_rtx_REG (mode, cum->fprs + 16);
10908 }
10909 else if (s390_function_arg_integer (mode, type))
10910 {
10911 int size = s390_function_arg_size (mode, type);
10912 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
10913
10914 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
10915 return NULL_RTX;
10916 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
10917 return gen_rtx_REG (mode, cum->gprs + 2);
10918 else if (n_gprs == 2)
10919 {
10920 rtvec p = rtvec_alloc (2);
10921
10922 RTVEC_ELT (p, 0)
10923 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
10924 const0_rtx);
10925 RTVEC_ELT (p, 1)
10926 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
10927 GEN_INT (4));
10928
10929 return gen_rtx_PARALLEL (mode, p);
10930 }
10931 }
10932
10933 /* After the real arguments, expand_call calls us once again
10934 with a void_type_node type. Whatever we return here is
10935 passed as operand 2 to the call expanders.
10936
10937 We don't need this feature ... */
10938 else if (type == void_type_node)
10939 return const0_rtx;
10940
10941 gcc_unreachable ();
10942 }
10943
10944 /* Return true if return values of type TYPE should be returned
10945 in a memory buffer whose address is passed by the caller as
10946 hidden first argument. */
10947
10948 static bool
10949 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
10950 {
10951 /* We accept small integral (and similar) types. */
10952 if (INTEGRAL_TYPE_P (type)
10953 || POINTER_TYPE_P (type)
10954 || TREE_CODE (type) == OFFSET_TYPE
10955 || TREE_CODE (type) == REAL_TYPE)
10956 return int_size_in_bytes (type) > 8;
10957
10958 /* vector types which fit into a VR. */
10959 if (TARGET_VX_ABI
10960 && VECTOR_TYPE_P (type)
10961 && int_size_in_bytes (type) <= 16)
10962 return false;
10963
10964 /* Aggregates and similar constructs are always returned
10965 in memory. */
10966 if (AGGREGATE_TYPE_P (type)
10967 || TREE_CODE (type) == COMPLEX_TYPE
10968 || VECTOR_TYPE_P (type))
10969 return true;
10970
10971 /* ??? We get called on all sorts of random stuff from
10972 aggregate_value_p. We can't abort, but it's not clear
10973 what's safe to return. Pretend it's a struct I guess. */
10974 return true;
10975 }
10976
10977 /* Function arguments and return values are promoted to word size. */
10978
10979 static machine_mode
10980 s390_promote_function_mode (const_tree type, machine_mode mode,
10981 int *punsignedp,
10982 const_tree fntype ATTRIBUTE_UNUSED,
10983 int for_return ATTRIBUTE_UNUSED)
10984 {
10985 if (INTEGRAL_MODE_P (mode)
10986 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
10987 {
10988 if (type != NULL_TREE && POINTER_TYPE_P (type))
10989 *punsignedp = POINTERS_EXTEND_UNSIGNED;
10990 return Pmode;
10991 }
10992
10993 return mode;
10994 }
10995
10996 /* Define where to return a (scalar) value of type RET_TYPE.
10997 If RET_TYPE is null, define where to return a (scalar)
10998 value of mode MODE from a libcall. */
10999
11000 static rtx
11001 s390_function_and_libcall_value (machine_mode mode,
11002 const_tree ret_type,
11003 const_tree fntype_or_decl,
11004 bool outgoing ATTRIBUTE_UNUSED)
11005 {
11006 /* For vector return types it is important to use the RET_TYPE
11007 argument whenever available since the middle-end might have
11008 changed the mode to a scalar mode. */
11009 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11010 || (!ret_type && VECTOR_MODE_P (mode)));
11011
11012 /* For normal functions perform the promotion as
11013 promote_function_mode would do. */
11014 if (ret_type)
11015 {
11016 int unsignedp = TYPE_UNSIGNED (ret_type);
11017 mode = promote_function_mode (ret_type, mode, &unsignedp,
11018 fntype_or_decl, 1);
11019 }
11020
11021 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11022 || SCALAR_FLOAT_MODE_P (mode)
11023 || (TARGET_VX_ABI && vector_ret_type_p));
11024 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11025
11026 if (TARGET_VX_ABI && vector_ret_type_p)
11027 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11028 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11029 return gen_rtx_REG (mode, 16);
11030 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11031 || UNITS_PER_LONG == UNITS_PER_WORD)
11032 return gen_rtx_REG (mode, 2);
11033 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11034 {
11035 /* This case is triggered when returning a 64 bit value with
11036 -m31 -mzarch. Although the value would fit into a single
11037 register it has to be forced into a 32 bit register pair in
11038 order to match the ABI. */
11039 rtvec p = rtvec_alloc (2);
11040
11041 RTVEC_ELT (p, 0)
11042 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11043 RTVEC_ELT (p, 1)
11044 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11045
11046 return gen_rtx_PARALLEL (mode, p);
11047 }
11048
11049 gcc_unreachable ();
11050 }
11051
11052 /* Define where to return a scalar return value of type RET_TYPE. */
11053
11054 static rtx
11055 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11056 bool outgoing)
11057 {
11058 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11059 fn_decl_or_type, outgoing);
11060 }
11061
11062 /* Define where to return a scalar libcall return value of mode
11063 MODE. */
11064
11065 static rtx
11066 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11067 {
11068 return s390_function_and_libcall_value (mode, NULL_TREE,
11069 NULL_TREE, true);
11070 }
11071
11072
11073 /* Create and return the va_list datatype.
11074
11075 On S/390, va_list is an array type equivalent to
11076
11077 typedef struct __va_list_tag
11078 {
11079 long __gpr;
11080 long __fpr;
11081 void *__overflow_arg_area;
11082 void *__reg_save_area;
11083 } va_list[1];
11084
11085 where __gpr and __fpr hold the number of general purpose
11086 or floating point arguments used up to now, respectively,
11087 __overflow_arg_area points to the stack location of the
11088 next argument passed on the stack, and __reg_save_area
11089 always points to the start of the register area in the
11090 call frame of the current function. The function prologue
11091 saves all registers used for argument passing into this
11092 area if the function uses variable arguments. */
11093
11094 static tree
11095 s390_build_builtin_va_list (void)
11096 {
11097 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11098
11099 record = lang_hooks.types.make_type (RECORD_TYPE);
11100
11101 type_decl =
11102 build_decl (BUILTINS_LOCATION,
11103 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11104
11105 f_gpr = build_decl (BUILTINS_LOCATION,
11106 FIELD_DECL, get_identifier ("__gpr"),
11107 long_integer_type_node);
11108 f_fpr = build_decl (BUILTINS_LOCATION,
11109 FIELD_DECL, get_identifier ("__fpr"),
11110 long_integer_type_node);
11111 f_ovf = build_decl (BUILTINS_LOCATION,
11112 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11113 ptr_type_node);
11114 f_sav = build_decl (BUILTINS_LOCATION,
11115 FIELD_DECL, get_identifier ("__reg_save_area"),
11116 ptr_type_node);
11117
11118 va_list_gpr_counter_field = f_gpr;
11119 va_list_fpr_counter_field = f_fpr;
11120
11121 DECL_FIELD_CONTEXT (f_gpr) = record;
11122 DECL_FIELD_CONTEXT (f_fpr) = record;
11123 DECL_FIELD_CONTEXT (f_ovf) = record;
11124 DECL_FIELD_CONTEXT (f_sav) = record;
11125
11126 TYPE_STUB_DECL (record) = type_decl;
11127 TYPE_NAME (record) = type_decl;
11128 TYPE_FIELDS (record) = f_gpr;
11129 DECL_CHAIN (f_gpr) = f_fpr;
11130 DECL_CHAIN (f_fpr) = f_ovf;
11131 DECL_CHAIN (f_ovf) = f_sav;
11132
11133 layout_type (record);
11134
11135 /* The correct type is an array type of one element. */
11136 return build_array_type (record, build_index_type (size_zero_node));
11137 }
11138
11139 /* Implement va_start by filling the va_list structure VALIST.
11140 STDARG_P is always true, and ignored.
11141 NEXTARG points to the first anonymous stack argument.
11142
11143 The following global variables are used to initialize
11144 the va_list structure:
11145
11146 crtl->args.info:
11147 holds number of gprs and fprs used for named arguments.
11148 crtl->args.arg_offset_rtx:
11149 holds the offset of the first anonymous stack argument
11150 (relative to the virtual arg pointer). */
11151
11152 static void
11153 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11154 {
11155 HOST_WIDE_INT n_gpr, n_fpr;
11156 int off;
11157 tree f_gpr, f_fpr, f_ovf, f_sav;
11158 tree gpr, fpr, ovf, sav, t;
11159
11160 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11161 f_fpr = DECL_CHAIN (f_gpr);
11162 f_ovf = DECL_CHAIN (f_fpr);
11163 f_sav = DECL_CHAIN (f_ovf);
11164
11165 valist = build_simple_mem_ref (valist);
11166 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11167 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11168 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11169 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11170
11171 /* Count number of gp and fp argument registers used. */
11172
11173 n_gpr = crtl->args.info.gprs;
11174 n_fpr = crtl->args.info.fprs;
11175
11176 if (cfun->va_list_gpr_size)
11177 {
11178 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11179 build_int_cst (NULL_TREE, n_gpr));
11180 TREE_SIDE_EFFECTS (t) = 1;
11181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11182 }
11183
11184 if (cfun->va_list_fpr_size)
11185 {
11186 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11187 build_int_cst (NULL_TREE, n_fpr));
11188 TREE_SIDE_EFFECTS (t) = 1;
11189 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11190 }
11191
11192 /* Find the overflow area.
11193 FIXME: This currently is too pessimistic when the vector ABI is
11194 enabled. In that case we *always* set up the overflow area
11195 pointer. */
11196 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11197 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11198 || TARGET_VX_ABI)
11199 {
11200 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11201
11202 off = INTVAL (crtl->args.arg_offset_rtx);
11203 off = off < 0 ? 0 : off;
11204 if (TARGET_DEBUG_ARG)
11205 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11206 (int)n_gpr, (int)n_fpr, off);
11207
11208 t = fold_build_pointer_plus_hwi (t, off);
11209
11210 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11211 TREE_SIDE_EFFECTS (t) = 1;
11212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11213 }
11214
11215 /* Find the register save area. */
11216 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11217 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11218 {
11219 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11220 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11221
11222 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11223 TREE_SIDE_EFFECTS (t) = 1;
11224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11225 }
11226 }
11227
11228 /* Implement va_arg by updating the va_list structure
11229 VALIST as required to retrieve an argument of type
11230 TYPE, and returning that argument.
11231
11232 Generates code equivalent to:
11233
11234 if (integral value) {
11235 if (size <= 4 && args.gpr < 5 ||
11236 size > 4 && args.gpr < 4 )
11237 ret = args.reg_save_area[args.gpr+8]
11238 else
11239 ret = *args.overflow_arg_area++;
11240 } else if (vector value) {
11241 ret = *args.overflow_arg_area;
11242 args.overflow_arg_area += size / 8;
11243 } else if (float value) {
11244 if (args.fgpr < 2)
11245 ret = args.reg_save_area[args.fpr+64]
11246 else
11247 ret = *args.overflow_arg_area++;
11248 } else if (aggregate value) {
11249 if (args.gpr < 5)
11250 ret = *args.reg_save_area[args.gpr]
11251 else
11252 ret = **args.overflow_arg_area++;
11253 } */
11254
11255 static tree
11256 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11257 gimple_seq *post_p ATTRIBUTE_UNUSED)
11258 {
11259 tree f_gpr, f_fpr, f_ovf, f_sav;
11260 tree gpr, fpr, ovf, sav, reg, t, u;
11261 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11262 tree lab_false, lab_over;
11263 tree addr = create_tmp_var (ptr_type_node, "addr");
11264 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11265 a stack slot. */
11266
11267 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11268 f_fpr = DECL_CHAIN (f_gpr);
11269 f_ovf = DECL_CHAIN (f_fpr);
11270 f_sav = DECL_CHAIN (f_ovf);
11271
11272 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11273 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11274 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11275
11276 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11277 both appear on a lhs. */
11278 valist = unshare_expr (valist);
11279 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11280
11281 size = int_size_in_bytes (type);
11282
11283 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11284 {
11285 if (TARGET_DEBUG_ARG)
11286 {
11287 fprintf (stderr, "va_arg: aggregate type");
11288 debug_tree (type);
11289 }
11290
11291 /* Aggregates are passed by reference. */
11292 indirect_p = 1;
11293 reg = gpr;
11294 n_reg = 1;
11295
11296 /* kernel stack layout on 31 bit: It is assumed here that no padding
11297 will be added by s390_frame_info because for va_args always an even
11298 number of gprs has to be saved r15-r2 = 14 regs. */
11299 sav_ofs = 2 * UNITS_PER_LONG;
11300 sav_scale = UNITS_PER_LONG;
11301 size = UNITS_PER_LONG;
11302 max_reg = GP_ARG_NUM_REG - n_reg;
11303 left_align_p = false;
11304 }
11305 else if (s390_function_arg_vector (TYPE_MODE (type), type))
11306 {
11307 if (TARGET_DEBUG_ARG)
11308 {
11309 fprintf (stderr, "va_arg: vector type");
11310 debug_tree (type);
11311 }
11312
11313 indirect_p = 0;
11314 reg = NULL_TREE;
11315 n_reg = 0;
11316 sav_ofs = 0;
11317 sav_scale = 8;
11318 max_reg = 0;
11319 left_align_p = true;
11320 }
11321 else if (s390_function_arg_float (TYPE_MODE (type), type))
11322 {
11323 if (TARGET_DEBUG_ARG)
11324 {
11325 fprintf (stderr, "va_arg: float type");
11326 debug_tree (type);
11327 }
11328
11329 /* FP args go in FP registers, if present. */
11330 indirect_p = 0;
11331 reg = fpr;
11332 n_reg = 1;
11333 sav_ofs = 16 * UNITS_PER_LONG;
11334 sav_scale = 8;
11335 max_reg = FP_ARG_NUM_REG - n_reg;
11336 left_align_p = false;
11337 }
11338 else
11339 {
11340 if (TARGET_DEBUG_ARG)
11341 {
11342 fprintf (stderr, "va_arg: other type");
11343 debug_tree (type);
11344 }
11345
11346 /* Otherwise into GP registers. */
11347 indirect_p = 0;
11348 reg = gpr;
11349 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11350
11351 /* kernel stack layout on 31 bit: It is assumed here that no padding
11352 will be added by s390_frame_info because for va_args always an even
11353 number of gprs has to be saved r15-r2 = 14 regs. */
11354 sav_ofs = 2 * UNITS_PER_LONG;
11355
11356 if (size < UNITS_PER_LONG)
11357 sav_ofs += UNITS_PER_LONG - size;
11358
11359 sav_scale = UNITS_PER_LONG;
11360 max_reg = GP_ARG_NUM_REG - n_reg;
11361 left_align_p = false;
11362 }
11363
11364 /* Pull the value out of the saved registers ... */
11365
11366 if (reg != NULL_TREE)
11367 {
11368 /*
11369 if (reg > ((typeof (reg))max_reg))
11370 goto lab_false;
11371
11372 addr = sav + sav_ofs + reg * save_scale;
11373
11374 goto lab_over;
11375
11376 lab_false:
11377 */
11378
11379 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11380 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11381
11382 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11383 t = build2 (GT_EXPR, boolean_type_node, reg, t);
11384 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11385 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11386 gimplify_and_add (t, pre_p);
11387
11388 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11389 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11390 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11391 t = fold_build_pointer_plus (t, u);
11392
11393 gimplify_assign (addr, t, pre_p);
11394
11395 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11396
11397 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11398 }
11399
11400 /* ... Otherwise out of the overflow area. */
11401
11402 t = ovf;
11403 if (size < UNITS_PER_LONG && !left_align_p)
11404 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11405
11406 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11407
11408 gimplify_assign (addr, t, pre_p);
11409
11410 if (size < UNITS_PER_LONG && left_align_p)
11411 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11412 else
11413 t = fold_build_pointer_plus_hwi (t, size);
11414
11415 gimplify_assign (ovf, t, pre_p);
11416
11417 if (reg != NULL_TREE)
11418 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11419
11420
11421 /* Increment register save count. */
11422
11423 if (n_reg > 0)
11424 {
11425 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
11426 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
11427 gimplify_and_add (u, pre_p);
11428 }
11429
11430 if (indirect_p)
11431 {
11432 t = build_pointer_type_for_mode (build_pointer_type (type),
11433 ptr_mode, true);
11434 addr = fold_convert (t, addr);
11435 addr = build_va_arg_indirect_ref (addr);
11436 }
11437 else
11438 {
11439 t = build_pointer_type_for_mode (type, ptr_mode, true);
11440 addr = fold_convert (t, addr);
11441 }
11442
11443 return build_va_arg_indirect_ref (addr);
11444 }
11445
11446 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
11447 expanders.
11448 DEST - Register location where CC will be stored.
11449 TDB - Pointer to a 256 byte area where to store the transaction.
11450 diagnostic block. NULL if TDB is not needed.
11451 RETRY - Retry count value. If non-NULL a retry loop for CC2
11452 is emitted
11453 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
11454 of the tbegin instruction pattern. */
11455
11456 void
11457 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
11458 {
11459 rtx retry_plus_two = gen_reg_rtx (SImode);
11460 rtx retry_reg = gen_reg_rtx (SImode);
11461 rtx_code_label *retry_label = NULL;
11462
11463 if (retry != NULL_RTX)
11464 {
11465 emit_move_insn (retry_reg, retry);
11466 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
11467 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
11468 retry_label = gen_label_rtx ();
11469 emit_label (retry_label);
11470 }
11471
11472 if (clobber_fprs_p)
11473 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
11474 else
11475 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11476 tdb));
11477
11478 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
11479 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
11480 CC_REGNUM)),
11481 UNSPEC_CC_TO_INT));
11482 if (retry != NULL_RTX)
11483 {
11484 const int CC0 = 1 << 3;
11485 const int CC1 = 1 << 2;
11486 const int CC3 = 1 << 0;
11487 rtx jump;
11488 rtx count = gen_reg_rtx (SImode);
11489 rtx_code_label *leave_label = gen_label_rtx ();
11490
11491 /* Exit for success and permanent failures. */
11492 jump = s390_emit_jump (leave_label,
11493 gen_rtx_EQ (VOIDmode,
11494 gen_rtx_REG (CCRAWmode, CC_REGNUM),
11495 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
11496 LABEL_NUSES (leave_label) = 1;
11497
11498 /* CC2 - transient failure. Perform retry with ppa. */
11499 emit_move_insn (count, retry_plus_two);
11500 emit_insn (gen_subsi3 (count, count, retry_reg));
11501 emit_insn (gen_tx_assist (count));
11502 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
11503 retry_reg,
11504 retry_reg));
11505 JUMP_LABEL (jump) = retry_label;
11506 LABEL_NUSES (retry_label) = 1;
11507 emit_label (leave_label);
11508 }
11509 }
11510
11511
11512 /* Return the decl for the target specific builtin with the function
11513 code FCODE. */
11514
11515 static tree
11516 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11517 {
11518 if (fcode >= S390_BUILTIN_MAX)
11519 return error_mark_node;
11520
11521 return s390_builtin_decls[fcode];
11522 }
11523
11524 /* We call mcount before the function prologue. So a profiled leaf
11525 function should stay a leaf function. */
11526
11527 static bool
11528 s390_keep_leaf_when_profiled ()
11529 {
11530 return true;
11531 }
11532
11533 /* Output assembly code for the trampoline template to
11534 stdio stream FILE.
11535
11536 On S/390, we use gpr 1 internally in the trampoline code;
11537 gpr 0 is used to hold the static chain. */
11538
11539 static void
11540 s390_asm_trampoline_template (FILE *file)
11541 {
11542 rtx op[2];
11543 op[0] = gen_rtx_REG (Pmode, 0);
11544 op[1] = gen_rtx_REG (Pmode, 1);
11545
11546 if (TARGET_64BIT)
11547 {
11548 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11549 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
11550 output_asm_insn ("br\t%1", op); /* 2 byte */
11551 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11552 }
11553 else
11554 {
11555 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11556 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
11557 output_asm_insn ("br\t%1", op); /* 2 byte */
11558 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11559 }
11560 }
11561
11562 /* Emit RTL insns to initialize the variable parts of a trampoline.
11563 FNADDR is an RTX for the address of the function's pure code.
11564 CXT is an RTX for the static chain value for the function. */
11565
11566 static void
11567 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11568 {
11569 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11570 rtx mem;
11571
11572 emit_block_move (m_tramp, assemble_trampoline_template (),
11573 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11574
11575 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11576 emit_move_insn (mem, cxt);
11577 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11578 emit_move_insn (mem, fnaddr);
11579 }
11580
11581 /* Output assembler code to FILE to increment profiler label # LABELNO
11582 for profiling a function entry. */
11583
11584 void
11585 s390_function_profiler (FILE *file, int labelno)
11586 {
11587 rtx op[7];
11588
11589 char label[128];
11590 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11591
11592 fprintf (file, "# function profiler \n");
11593
11594 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11595 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11596 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11597
11598 op[2] = gen_rtx_REG (Pmode, 1);
11599 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11600 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11601
11602 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11603 if (flag_pic)
11604 {
11605 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11606 op[4] = gen_rtx_CONST (Pmode, op[4]);
11607 }
11608
11609 if (TARGET_64BIT)
11610 {
11611 output_asm_insn ("stg\t%0,%1", op);
11612 output_asm_insn ("larl\t%2,%3", op);
11613 output_asm_insn ("brasl\t%0,%4", op);
11614 output_asm_insn ("lg\t%0,%1", op);
11615 }
11616 else if (!flag_pic)
11617 {
11618 op[6] = gen_label_rtx ();
11619
11620 output_asm_insn ("st\t%0,%1", op);
11621 output_asm_insn ("bras\t%2,%l6", op);
11622 output_asm_insn (".long\t%4", op);
11623 output_asm_insn (".long\t%3", op);
11624 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11625 output_asm_insn ("l\t%0,0(%2)", op);
11626 output_asm_insn ("l\t%2,4(%2)", op);
11627 output_asm_insn ("basr\t%0,%0", op);
11628 output_asm_insn ("l\t%0,%1", op);
11629 }
11630 else
11631 {
11632 op[5] = gen_label_rtx ();
11633 op[6] = gen_label_rtx ();
11634
11635 output_asm_insn ("st\t%0,%1", op);
11636 output_asm_insn ("bras\t%2,%l6", op);
11637 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11638 output_asm_insn (".long\t%4-%l5", op);
11639 output_asm_insn (".long\t%3-%l5", op);
11640 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11641 output_asm_insn ("lr\t%0,%2", op);
11642 output_asm_insn ("a\t%0,0(%2)", op);
11643 output_asm_insn ("a\t%2,4(%2)", op);
11644 output_asm_insn ("basr\t%0,%0", op);
11645 output_asm_insn ("l\t%0,%1", op);
11646 }
11647 }
11648
11649 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11650 into its SYMBOL_REF_FLAGS. */
11651
11652 static void
11653 s390_encode_section_info (tree decl, rtx rtl, int first)
11654 {
11655 default_encode_section_info (decl, rtl, first);
11656
11657 if (TREE_CODE (decl) == VAR_DECL)
11658 {
11659 /* If a variable has a forced alignment to < 2 bytes, mark it
11660 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
11661 operand. */
11662 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
11663 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
11664 if (!DECL_SIZE (decl)
11665 || !DECL_ALIGN (decl)
11666 || !tree_fits_shwi_p (DECL_SIZE (decl))
11667 || (DECL_ALIGN (decl) <= 64
11668 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
11669 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11670 }
11671
11672 /* Literal pool references don't have a decl so they are handled
11673 differently here. We rely on the information in the MEM_ALIGN
11674 entry to decide upon natural alignment. */
11675 if (MEM_P (rtl)
11676 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
11677 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
11678 && (MEM_ALIGN (rtl) == 0
11679 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
11680 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
11681 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11682 }
11683
11684 /* Output thunk to FILE that implements a C++ virtual function call (with
11685 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
11686 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
11687 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
11688 relative to the resulting this pointer. */
11689
11690 static void
11691 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
11692 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11693 tree function)
11694 {
11695 rtx op[10];
11696 int nonlocal = 0;
11697
11698 /* Make sure unwind info is emitted for the thunk if needed. */
11699 final_start_function (emit_barrier (), file, 1);
11700
11701 /* Operand 0 is the target function. */
11702 op[0] = XEXP (DECL_RTL (function), 0);
11703 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
11704 {
11705 nonlocal = 1;
11706 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
11707 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
11708 op[0] = gen_rtx_CONST (Pmode, op[0]);
11709 }
11710
11711 /* Operand 1 is the 'this' pointer. */
11712 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11713 op[1] = gen_rtx_REG (Pmode, 3);
11714 else
11715 op[1] = gen_rtx_REG (Pmode, 2);
11716
11717 /* Operand 2 is the delta. */
11718 op[2] = GEN_INT (delta);
11719
11720 /* Operand 3 is the vcall_offset. */
11721 op[3] = GEN_INT (vcall_offset);
11722
11723 /* Operand 4 is the temporary register. */
11724 op[4] = gen_rtx_REG (Pmode, 1);
11725
11726 /* Operands 5 to 8 can be used as labels. */
11727 op[5] = NULL_RTX;
11728 op[6] = NULL_RTX;
11729 op[7] = NULL_RTX;
11730 op[8] = NULL_RTX;
11731
11732 /* Operand 9 can be used for temporary register. */
11733 op[9] = NULL_RTX;
11734
11735 /* Generate code. */
11736 if (TARGET_64BIT)
11737 {
11738 /* Setup literal pool pointer if required. */
11739 if ((!DISP_IN_RANGE (delta)
11740 && !CONST_OK_FOR_K (delta)
11741 && !CONST_OK_FOR_Os (delta))
11742 || (!DISP_IN_RANGE (vcall_offset)
11743 && !CONST_OK_FOR_K (vcall_offset)
11744 && !CONST_OK_FOR_Os (vcall_offset)))
11745 {
11746 op[5] = gen_label_rtx ();
11747 output_asm_insn ("larl\t%4,%5", op);
11748 }
11749
11750 /* Add DELTA to this pointer. */
11751 if (delta)
11752 {
11753 if (CONST_OK_FOR_J (delta))
11754 output_asm_insn ("la\t%1,%2(%1)", op);
11755 else if (DISP_IN_RANGE (delta))
11756 output_asm_insn ("lay\t%1,%2(%1)", op);
11757 else if (CONST_OK_FOR_K (delta))
11758 output_asm_insn ("aghi\t%1,%2", op);
11759 else if (CONST_OK_FOR_Os (delta))
11760 output_asm_insn ("agfi\t%1,%2", op);
11761 else
11762 {
11763 op[6] = gen_label_rtx ();
11764 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
11765 }
11766 }
11767
11768 /* Perform vcall adjustment. */
11769 if (vcall_offset)
11770 {
11771 if (DISP_IN_RANGE (vcall_offset))
11772 {
11773 output_asm_insn ("lg\t%4,0(%1)", op);
11774 output_asm_insn ("ag\t%1,%3(%4)", op);
11775 }
11776 else if (CONST_OK_FOR_K (vcall_offset))
11777 {
11778 output_asm_insn ("lghi\t%4,%3", op);
11779 output_asm_insn ("ag\t%4,0(%1)", op);
11780 output_asm_insn ("ag\t%1,0(%4)", op);
11781 }
11782 else if (CONST_OK_FOR_Os (vcall_offset))
11783 {
11784 output_asm_insn ("lgfi\t%4,%3", op);
11785 output_asm_insn ("ag\t%4,0(%1)", op);
11786 output_asm_insn ("ag\t%1,0(%4)", op);
11787 }
11788 else
11789 {
11790 op[7] = gen_label_rtx ();
11791 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
11792 output_asm_insn ("ag\t%4,0(%1)", op);
11793 output_asm_insn ("ag\t%1,0(%4)", op);
11794 }
11795 }
11796
11797 /* Jump to target. */
11798 output_asm_insn ("jg\t%0", op);
11799
11800 /* Output literal pool if required. */
11801 if (op[5])
11802 {
11803 output_asm_insn (".align\t4", op);
11804 targetm.asm_out.internal_label (file, "L",
11805 CODE_LABEL_NUMBER (op[5]));
11806 }
11807 if (op[6])
11808 {
11809 targetm.asm_out.internal_label (file, "L",
11810 CODE_LABEL_NUMBER (op[6]));
11811 output_asm_insn (".long\t%2", op);
11812 }
11813 if (op[7])
11814 {
11815 targetm.asm_out.internal_label (file, "L",
11816 CODE_LABEL_NUMBER (op[7]));
11817 output_asm_insn (".long\t%3", op);
11818 }
11819 }
11820 else
11821 {
11822 /* Setup base pointer if required. */
11823 if (!vcall_offset
11824 || (!DISP_IN_RANGE (delta)
11825 && !CONST_OK_FOR_K (delta)
11826 && !CONST_OK_FOR_Os (delta))
11827 || (!DISP_IN_RANGE (delta)
11828 && !CONST_OK_FOR_K (vcall_offset)
11829 && !CONST_OK_FOR_Os (vcall_offset)))
11830 {
11831 op[5] = gen_label_rtx ();
11832 output_asm_insn ("basr\t%4,0", op);
11833 targetm.asm_out.internal_label (file, "L",
11834 CODE_LABEL_NUMBER (op[5]));
11835 }
11836
11837 /* Add DELTA to this pointer. */
11838 if (delta)
11839 {
11840 if (CONST_OK_FOR_J (delta))
11841 output_asm_insn ("la\t%1,%2(%1)", op);
11842 else if (DISP_IN_RANGE (delta))
11843 output_asm_insn ("lay\t%1,%2(%1)", op);
11844 else if (CONST_OK_FOR_K (delta))
11845 output_asm_insn ("ahi\t%1,%2", op);
11846 else if (CONST_OK_FOR_Os (delta))
11847 output_asm_insn ("afi\t%1,%2", op);
11848 else
11849 {
11850 op[6] = gen_label_rtx ();
11851 output_asm_insn ("a\t%1,%6-%5(%4)", op);
11852 }
11853 }
11854
11855 /* Perform vcall adjustment. */
11856 if (vcall_offset)
11857 {
11858 if (CONST_OK_FOR_J (vcall_offset))
11859 {
11860 output_asm_insn ("l\t%4,0(%1)", op);
11861 output_asm_insn ("a\t%1,%3(%4)", op);
11862 }
11863 else if (DISP_IN_RANGE (vcall_offset))
11864 {
11865 output_asm_insn ("l\t%4,0(%1)", op);
11866 output_asm_insn ("ay\t%1,%3(%4)", op);
11867 }
11868 else if (CONST_OK_FOR_K (vcall_offset))
11869 {
11870 output_asm_insn ("lhi\t%4,%3", op);
11871 output_asm_insn ("a\t%4,0(%1)", op);
11872 output_asm_insn ("a\t%1,0(%4)", op);
11873 }
11874 else if (CONST_OK_FOR_Os (vcall_offset))
11875 {
11876 output_asm_insn ("iilf\t%4,%3", op);
11877 output_asm_insn ("a\t%4,0(%1)", op);
11878 output_asm_insn ("a\t%1,0(%4)", op);
11879 }
11880 else
11881 {
11882 op[7] = gen_label_rtx ();
11883 output_asm_insn ("l\t%4,%7-%5(%4)", op);
11884 output_asm_insn ("a\t%4,0(%1)", op);
11885 output_asm_insn ("a\t%1,0(%4)", op);
11886 }
11887
11888 /* We had to clobber the base pointer register.
11889 Re-setup the base pointer (with a different base). */
11890 op[5] = gen_label_rtx ();
11891 output_asm_insn ("basr\t%4,0", op);
11892 targetm.asm_out.internal_label (file, "L",
11893 CODE_LABEL_NUMBER (op[5]));
11894 }
11895
11896 /* Jump to target. */
11897 op[8] = gen_label_rtx ();
11898
11899 if (!flag_pic)
11900 output_asm_insn ("l\t%4,%8-%5(%4)", op);
11901 else if (!nonlocal)
11902 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11903 /* We cannot call through .plt, since .plt requires %r12 loaded. */
11904 else if (flag_pic == 1)
11905 {
11906 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11907 output_asm_insn ("l\t%4,%0(%4)", op);
11908 }
11909 else if (flag_pic == 2)
11910 {
11911 op[9] = gen_rtx_REG (Pmode, 0);
11912 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
11913 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11914 output_asm_insn ("ar\t%4,%9", op);
11915 output_asm_insn ("l\t%4,0(%4)", op);
11916 }
11917
11918 output_asm_insn ("br\t%4", op);
11919
11920 /* Output literal pool. */
11921 output_asm_insn (".align\t4", op);
11922
11923 if (nonlocal && flag_pic == 2)
11924 output_asm_insn (".long\t%0", op);
11925 if (nonlocal)
11926 {
11927 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
11928 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
11929 }
11930
11931 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
11932 if (!flag_pic)
11933 output_asm_insn (".long\t%0", op);
11934 else
11935 output_asm_insn (".long\t%0-%5", op);
11936
11937 if (op[6])
11938 {
11939 targetm.asm_out.internal_label (file, "L",
11940 CODE_LABEL_NUMBER (op[6]));
11941 output_asm_insn (".long\t%2", op);
11942 }
11943 if (op[7])
11944 {
11945 targetm.asm_out.internal_label (file, "L",
11946 CODE_LABEL_NUMBER (op[7]));
11947 output_asm_insn (".long\t%3", op);
11948 }
11949 }
11950 final_end_function ();
11951 }
11952
11953 static bool
11954 s390_valid_pointer_mode (machine_mode mode)
11955 {
11956 return (mode == SImode || (TARGET_64BIT && mode == DImode));
11957 }
11958
11959 /* Checks whether the given CALL_EXPR would use a caller
11960 saved register. This is used to decide whether sibling call
11961 optimization could be performed on the respective function
11962 call. */
11963
11964 static bool
11965 s390_call_saved_register_used (tree call_expr)
11966 {
11967 CUMULATIVE_ARGS cum_v;
11968 cumulative_args_t cum;
11969 tree parameter;
11970 machine_mode mode;
11971 tree type;
11972 rtx parm_rtx;
11973 int reg, i;
11974
11975 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
11976 cum = pack_cumulative_args (&cum_v);
11977
11978 for (i = 0; i < call_expr_nargs (call_expr); i++)
11979 {
11980 parameter = CALL_EXPR_ARG (call_expr, i);
11981 gcc_assert (parameter);
11982
11983 /* For an undeclared variable passed as parameter we will get
11984 an ERROR_MARK node here. */
11985 if (TREE_CODE (parameter) == ERROR_MARK)
11986 return true;
11987
11988 type = TREE_TYPE (parameter);
11989 gcc_assert (type);
11990
11991 mode = TYPE_MODE (type);
11992 gcc_assert (mode);
11993
11994 /* We assume that in the target function all parameters are
11995 named. This only has an impact on vector argument register
11996 usage none of which is call-saved. */
11997 if (pass_by_reference (&cum_v, mode, type, true))
11998 {
11999 mode = Pmode;
12000 type = build_pointer_type (type);
12001 }
12002
12003 parm_rtx = s390_function_arg (cum, mode, type, true);
12004
12005 s390_function_arg_advance (cum, mode, type, true);
12006
12007 if (!parm_rtx)
12008 continue;
12009
12010 if (REG_P (parm_rtx))
12011 {
12012 for (reg = 0;
12013 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12014 reg++)
12015 if (!call_used_regs[reg + REGNO (parm_rtx)])
12016 return true;
12017 }
12018
12019 if (GET_CODE (parm_rtx) == PARALLEL)
12020 {
12021 int i;
12022
12023 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12024 {
12025 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12026
12027 gcc_assert (REG_P (r));
12028
12029 for (reg = 0;
12030 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12031 reg++)
12032 if (!call_used_regs[reg + REGNO (r)])
12033 return true;
12034 }
12035 }
12036
12037 }
12038 return false;
12039 }
12040
12041 /* Return true if the given call expression can be
12042 turned into a sibling call.
12043 DECL holds the declaration of the function to be called whereas
12044 EXP is the call expression itself. */
12045
12046 static bool
12047 s390_function_ok_for_sibcall (tree decl, tree exp)
12048 {
12049 /* The TPF epilogue uses register 1. */
12050 if (TARGET_TPF_PROFILING)
12051 return false;
12052
12053 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12054 which would have to be restored before the sibcall. */
12055 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12056 return false;
12057
12058 /* Register 6 on s390 is available as an argument register but unfortunately
12059 "caller saved". This makes functions needing this register for arguments
12060 not suitable for sibcalls. */
12061 return !s390_call_saved_register_used (exp);
12062 }
12063
12064 /* Return the fixed registers used for condition codes. */
12065
12066 static bool
12067 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12068 {
12069 *p1 = CC_REGNUM;
12070 *p2 = INVALID_REGNUM;
12071
12072 return true;
12073 }
12074
12075 /* This function is used by the call expanders of the machine description.
12076 It emits the call insn itself together with the necessary operations
12077 to adjust the target address and returns the emitted insn.
12078 ADDR_LOCATION is the target address rtx
12079 TLS_CALL the location of the thread-local symbol
12080 RESULT_REG the register where the result of the call should be stored
12081 RETADDR_REG the register where the return address should be stored
12082 If this parameter is NULL_RTX the call is considered
12083 to be a sibling call. */
12084
12085 rtx_insn *
12086 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12087 rtx retaddr_reg)
12088 {
12089 bool plt_call = false;
12090 rtx_insn *insn;
12091 rtx call;
12092 rtx clobber;
12093 rtvec vec;
12094
12095 /* Direct function calls need special treatment. */
12096 if (GET_CODE (addr_location) == SYMBOL_REF)
12097 {
12098 /* When calling a global routine in PIC mode, we must
12099 replace the symbol itself with the PLT stub. */
12100 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12101 {
12102 if (retaddr_reg != NULL_RTX)
12103 {
12104 addr_location = gen_rtx_UNSPEC (Pmode,
12105 gen_rtvec (1, addr_location),
12106 UNSPEC_PLT);
12107 addr_location = gen_rtx_CONST (Pmode, addr_location);
12108 plt_call = true;
12109 }
12110 else
12111 /* For -fpic code the PLT entries might use r12 which is
12112 call-saved. Therefore we cannot do a sibcall when
12113 calling directly using a symbol ref. When reaching
12114 this point we decided (in s390_function_ok_for_sibcall)
12115 to do a sibcall for a function pointer but one of the
12116 optimizers was able to get rid of the function pointer
12117 by propagating the symbol ref into the call. This
12118 optimization is illegal for S/390 so we turn the direct
12119 call into a indirect call again. */
12120 addr_location = force_reg (Pmode, addr_location);
12121 }
12122
12123 /* Unless we can use the bras(l) insn, force the
12124 routine address into a register. */
12125 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12126 {
12127 if (flag_pic)
12128 addr_location = legitimize_pic_address (addr_location, 0);
12129 else
12130 addr_location = force_reg (Pmode, addr_location);
12131 }
12132 }
12133
12134 /* If it is already an indirect call or the code above moved the
12135 SYMBOL_REF to somewhere else make sure the address can be found in
12136 register 1. */
12137 if (retaddr_reg == NULL_RTX
12138 && GET_CODE (addr_location) != SYMBOL_REF
12139 && !plt_call)
12140 {
12141 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12142 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12143 }
12144
12145 addr_location = gen_rtx_MEM (QImode, addr_location);
12146 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12147
12148 if (result_reg != NULL_RTX)
12149 call = gen_rtx_SET (result_reg, call);
12150
12151 if (retaddr_reg != NULL_RTX)
12152 {
12153 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12154
12155 if (tls_call != NULL_RTX)
12156 vec = gen_rtvec (3, call, clobber,
12157 gen_rtx_USE (VOIDmode, tls_call));
12158 else
12159 vec = gen_rtvec (2, call, clobber);
12160
12161 call = gen_rtx_PARALLEL (VOIDmode, vec);
12162 }
12163
12164 insn = emit_call_insn (call);
12165
12166 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12167 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12168 {
12169 /* s390_function_ok_for_sibcall should
12170 have denied sibcalls in this case. */
12171 gcc_assert (retaddr_reg != NULL_RTX);
12172 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12173 }
12174 return insn;
12175 }
12176
12177 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12178
12179 static void
12180 s390_conditional_register_usage (void)
12181 {
12182 int i;
12183
12184 if (flag_pic)
12185 {
12186 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12187 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12188 }
12189 if (TARGET_CPU_ZARCH)
12190 {
12191 fixed_regs[BASE_REGNUM] = 0;
12192 call_used_regs[BASE_REGNUM] = 0;
12193 fixed_regs[RETURN_REGNUM] = 0;
12194 call_used_regs[RETURN_REGNUM] = 0;
12195 }
12196 if (TARGET_64BIT)
12197 {
12198 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12199 call_used_regs[i] = call_really_used_regs[i] = 0;
12200 }
12201 else
12202 {
12203 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12204 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12205 }
12206
12207 if (TARGET_SOFT_FLOAT)
12208 {
12209 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12210 call_used_regs[i] = fixed_regs[i] = 1;
12211 }
12212
12213 /* Disable v16 - v31 for non-vector target. */
12214 if (!TARGET_VX)
12215 {
12216 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12217 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12218 }
12219 }
12220
12221 /* Corresponding function to eh_return expander. */
12222
12223 static GTY(()) rtx s390_tpf_eh_return_symbol;
12224 void
12225 s390_emit_tpf_eh_return (rtx target)
12226 {
12227 rtx_insn *insn;
12228 rtx reg, orig_ra;
12229
12230 if (!s390_tpf_eh_return_symbol)
12231 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12232
12233 reg = gen_rtx_REG (Pmode, 2);
12234 orig_ra = gen_rtx_REG (Pmode, 3);
12235
12236 emit_move_insn (reg, target);
12237 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12238 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12239 gen_rtx_REG (Pmode, RETURN_REGNUM));
12240 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12241 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12242
12243 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12244 }
12245
12246 /* Rework the prologue/epilogue to avoid saving/restoring
12247 registers unnecessarily. */
12248
12249 static void
12250 s390_optimize_prologue (void)
12251 {
12252 rtx_insn *insn, *new_insn, *next_insn;
12253
12254 /* Do a final recompute of the frame-related data. */
12255 s390_optimize_register_info ();
12256
12257 /* If all special registers are in fact used, there's nothing we
12258 can do, so no point in walking the insn list. */
12259
12260 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12261 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12262 && (TARGET_CPU_ZARCH
12263 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12264 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12265 return;
12266
12267 /* Search for prologue/epilogue insns and replace them. */
12268
12269 for (insn = get_insns (); insn; insn = next_insn)
12270 {
12271 int first, last, off;
12272 rtx set, base, offset;
12273 rtx pat;
12274
12275 next_insn = NEXT_INSN (insn);
12276
12277 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12278 continue;
12279
12280 pat = PATTERN (insn);
12281
12282 /* Remove ldgr/lgdr instructions used for saving and restore
12283 GPRs if possible. */
12284 if (TARGET_Z10
12285 && GET_CODE (pat) == SET
12286 && GET_MODE (SET_SRC (pat)) == DImode
12287 && REG_P (SET_SRC (pat))
12288 && REG_P (SET_DEST (pat)))
12289 {
12290 int src_regno = REGNO (SET_SRC (pat));
12291 int dest_regno = REGNO (SET_DEST (pat));
12292 int gpr_regno;
12293 int fpr_regno;
12294
12295 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
12296 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
12297 continue;
12298
12299 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12300 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12301
12302 /* GPR must be call-saved, FPR must be call-clobbered. */
12303 if (!call_really_used_regs[fpr_regno]
12304 || call_really_used_regs[gpr_regno])
12305 continue;
12306
12307 /* It must not happen that what we once saved in an FPR now
12308 needs a stack slot. */
12309 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
12310
12311 if (cfun_gpr_save_slot (gpr_regno) == 0)
12312 {
12313 remove_insn (insn);
12314 continue;
12315 }
12316 }
12317
12318 if (GET_CODE (pat) == PARALLEL
12319 && store_multiple_operation (pat, VOIDmode))
12320 {
12321 set = XVECEXP (pat, 0, 0);
12322 first = REGNO (SET_SRC (set));
12323 last = first + XVECLEN (pat, 0) - 1;
12324 offset = const0_rtx;
12325 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12326 off = INTVAL (offset);
12327
12328 if (GET_CODE (base) != REG || off < 0)
12329 continue;
12330 if (cfun_frame_layout.first_save_gpr != -1
12331 && (cfun_frame_layout.first_save_gpr < first
12332 || cfun_frame_layout.last_save_gpr > last))
12333 continue;
12334 if (REGNO (base) != STACK_POINTER_REGNUM
12335 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12336 continue;
12337 if (first > BASE_REGNUM || last < BASE_REGNUM)
12338 continue;
12339
12340 if (cfun_frame_layout.first_save_gpr != -1)
12341 {
12342 rtx s_pat = save_gprs (base,
12343 off + (cfun_frame_layout.first_save_gpr
12344 - first) * UNITS_PER_LONG,
12345 cfun_frame_layout.first_save_gpr,
12346 cfun_frame_layout.last_save_gpr);
12347 new_insn = emit_insn_before (s_pat, insn);
12348 INSN_ADDRESSES_NEW (new_insn, -1);
12349 }
12350
12351 remove_insn (insn);
12352 continue;
12353 }
12354
12355 if (cfun_frame_layout.first_save_gpr == -1
12356 && GET_CODE (pat) == SET
12357 && GENERAL_REG_P (SET_SRC (pat))
12358 && GET_CODE (SET_DEST (pat)) == MEM)
12359 {
12360 set = pat;
12361 first = REGNO (SET_SRC (set));
12362 offset = const0_rtx;
12363 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12364 off = INTVAL (offset);
12365
12366 if (GET_CODE (base) != REG || off < 0)
12367 continue;
12368 if (REGNO (base) != STACK_POINTER_REGNUM
12369 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12370 continue;
12371
12372 remove_insn (insn);
12373 continue;
12374 }
12375
12376 if (GET_CODE (pat) == PARALLEL
12377 && load_multiple_operation (pat, VOIDmode))
12378 {
12379 set = XVECEXP (pat, 0, 0);
12380 first = REGNO (SET_DEST (set));
12381 last = first + XVECLEN (pat, 0) - 1;
12382 offset = const0_rtx;
12383 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12384 off = INTVAL (offset);
12385
12386 if (GET_CODE (base) != REG || off < 0)
12387 continue;
12388
12389 if (cfun_frame_layout.first_restore_gpr != -1
12390 && (cfun_frame_layout.first_restore_gpr < first
12391 || cfun_frame_layout.last_restore_gpr > last))
12392 continue;
12393 if (REGNO (base) != STACK_POINTER_REGNUM
12394 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12395 continue;
12396 if (first > BASE_REGNUM || last < BASE_REGNUM)
12397 continue;
12398
12399 if (cfun_frame_layout.first_restore_gpr != -1)
12400 {
12401 rtx rpat = restore_gprs (base,
12402 off + (cfun_frame_layout.first_restore_gpr
12403 - first) * UNITS_PER_LONG,
12404 cfun_frame_layout.first_restore_gpr,
12405 cfun_frame_layout.last_restore_gpr);
12406
12407 /* Remove REG_CFA_RESTOREs for registers that we no
12408 longer need to save. */
12409 REG_NOTES (rpat) = REG_NOTES (insn);
12410 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12411 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12412 && ((int) REGNO (XEXP (*ptr, 0))
12413 < cfun_frame_layout.first_restore_gpr))
12414 *ptr = XEXP (*ptr, 1);
12415 else
12416 ptr = &XEXP (*ptr, 1);
12417 new_insn = emit_insn_before (rpat, insn);
12418 RTX_FRAME_RELATED_P (new_insn) = 1;
12419 INSN_ADDRESSES_NEW (new_insn, -1);
12420 }
12421
12422 remove_insn (insn);
12423 continue;
12424 }
12425
12426 if (cfun_frame_layout.first_restore_gpr == -1
12427 && GET_CODE (pat) == SET
12428 && GENERAL_REG_P (SET_DEST (pat))
12429 && GET_CODE (SET_SRC (pat)) == MEM)
12430 {
12431 set = pat;
12432 first = REGNO (SET_DEST (set));
12433 offset = const0_rtx;
12434 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12435 off = INTVAL (offset);
12436
12437 if (GET_CODE (base) != REG || off < 0)
12438 continue;
12439
12440 if (REGNO (base) != STACK_POINTER_REGNUM
12441 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12442 continue;
12443
12444 remove_insn (insn);
12445 continue;
12446 }
12447 }
12448 }
12449
12450 /* On z10 and later the dynamic branch prediction must see the
12451 backward jump within a certain windows. If not it falls back to
12452 the static prediction. This function rearranges the loop backward
12453 branch in a way which makes the static prediction always correct.
12454 The function returns true if it added an instruction. */
12455 static bool
12456 s390_fix_long_loop_prediction (rtx_insn *insn)
12457 {
12458 rtx set = single_set (insn);
12459 rtx code_label, label_ref, new_label;
12460 rtx_insn *uncond_jump;
12461 rtx_insn *cur_insn;
12462 rtx tmp;
12463 int distance;
12464
12465 /* This will exclude branch on count and branch on index patterns
12466 since these are correctly statically predicted. */
12467 if (!set
12468 || SET_DEST (set) != pc_rtx
12469 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12470 return false;
12471
12472 /* Skip conditional returns. */
12473 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12474 && XEXP (SET_SRC (set), 2) == pc_rtx)
12475 return false;
12476
12477 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12478 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12479
12480 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12481
12482 code_label = XEXP (label_ref, 0);
12483
12484 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12485 || INSN_ADDRESSES (INSN_UID (insn)) == -1
12486 || (INSN_ADDRESSES (INSN_UID (insn))
12487 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12488 return false;
12489
12490 for (distance = 0, cur_insn = PREV_INSN (insn);
12491 distance < PREDICT_DISTANCE - 6;
12492 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12493 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12494 return false;
12495
12496 new_label = gen_label_rtx ();
12497 uncond_jump = emit_jump_insn_after (
12498 gen_rtx_SET (pc_rtx,
12499 gen_rtx_LABEL_REF (VOIDmode, code_label)),
12500 insn);
12501 emit_label_after (new_label, uncond_jump);
12502
12503 tmp = XEXP (SET_SRC (set), 1);
12504 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12505 XEXP (SET_SRC (set), 2) = tmp;
12506 INSN_CODE (insn) = -1;
12507
12508 XEXP (label_ref, 0) = new_label;
12509 JUMP_LABEL (insn) = new_label;
12510 JUMP_LABEL (uncond_jump) = code_label;
12511
12512 return true;
12513 }
12514
12515 /* Returns 1 if INSN reads the value of REG for purposes not related
12516 to addressing of memory, and 0 otherwise. */
12517 static int
12518 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12519 {
12520 return reg_referenced_p (reg, PATTERN (insn))
12521 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12522 }
12523
12524 /* Starting from INSN find_cond_jump looks downwards in the insn
12525 stream for a single jump insn which is the last user of the
12526 condition code set in INSN. */
12527 static rtx_insn *
12528 find_cond_jump (rtx_insn *insn)
12529 {
12530 for (; insn; insn = NEXT_INSN (insn))
12531 {
12532 rtx ite, cc;
12533
12534 if (LABEL_P (insn))
12535 break;
12536
12537 if (!JUMP_P (insn))
12538 {
12539 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12540 break;
12541 continue;
12542 }
12543
12544 /* This will be triggered by a return. */
12545 if (GET_CODE (PATTERN (insn)) != SET)
12546 break;
12547
12548 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12549 ite = SET_SRC (PATTERN (insn));
12550
12551 if (GET_CODE (ite) != IF_THEN_ELSE)
12552 break;
12553
12554 cc = XEXP (XEXP (ite, 0), 0);
12555 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12556 break;
12557
12558 if (find_reg_note (insn, REG_DEAD, cc))
12559 return insn;
12560 break;
12561 }
12562
12563 return NULL;
12564 }
12565
12566 /* Swap the condition in COND and the operands in OP0 and OP1 so that
12567 the semantics does not change. If NULL_RTX is passed as COND the
12568 function tries to find the conditional jump starting with INSN. */
12569 static void
12570 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12571 {
12572 rtx tmp = *op0;
12573
12574 if (cond == NULL_RTX)
12575 {
12576 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12577 rtx set = jump ? single_set (jump) : NULL_RTX;
12578
12579 if (set == NULL_RTX)
12580 return;
12581
12582 cond = XEXP (SET_SRC (set), 0);
12583 }
12584
12585 *op0 = *op1;
12586 *op1 = tmp;
12587 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12588 }
12589
12590 /* On z10, instructions of the compare-and-branch family have the
12591 property to access the register occurring as second operand with
12592 its bits complemented. If such a compare is grouped with a second
12593 instruction that accesses the same register non-complemented, and
12594 if that register's value is delivered via a bypass, then the
12595 pipeline recycles, thereby causing significant performance decline.
12596 This function locates such situations and exchanges the two
12597 operands of the compare. The function return true whenever it
12598 added an insn. */
12599 static bool
12600 s390_z10_optimize_cmp (rtx_insn *insn)
12601 {
12602 rtx_insn *prev_insn, *next_insn;
12603 bool insn_added_p = false;
12604 rtx cond, *op0, *op1;
12605
12606 if (GET_CODE (PATTERN (insn)) == PARALLEL)
12607 {
12608 /* Handle compare and branch and branch on count
12609 instructions. */
12610 rtx pattern = single_set (insn);
12611
12612 if (!pattern
12613 || SET_DEST (pattern) != pc_rtx
12614 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12615 return false;
12616
12617 cond = XEXP (SET_SRC (pattern), 0);
12618 op0 = &XEXP (cond, 0);
12619 op1 = &XEXP (cond, 1);
12620 }
12621 else if (GET_CODE (PATTERN (insn)) == SET)
12622 {
12623 rtx src, dest;
12624
12625 /* Handle normal compare instructions. */
12626 src = SET_SRC (PATTERN (insn));
12627 dest = SET_DEST (PATTERN (insn));
12628
12629 if (!REG_P (dest)
12630 || !CC_REGNO_P (REGNO (dest))
12631 || GET_CODE (src) != COMPARE)
12632 return false;
12633
12634 /* s390_swap_cmp will try to find the conditional
12635 jump when passing NULL_RTX as condition. */
12636 cond = NULL_RTX;
12637 op0 = &XEXP (src, 0);
12638 op1 = &XEXP (src, 1);
12639 }
12640 else
12641 return false;
12642
12643 if (!REG_P (*op0) || !REG_P (*op1))
12644 return false;
12645
12646 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12647 return false;
12648
12649 /* Swap the COMPARE arguments and its mask if there is a
12650 conflicting access in the previous insn. */
12651 prev_insn = prev_active_insn (insn);
12652 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12653 && reg_referenced_p (*op1, PATTERN (prev_insn)))
12654 s390_swap_cmp (cond, op0, op1, insn);
12655
12656 /* Check if there is a conflict with the next insn. If there
12657 was no conflict with the previous insn, then swap the
12658 COMPARE arguments and its mask. If we already swapped
12659 the operands, or if swapping them would cause a conflict
12660 with the previous insn, issue a NOP after the COMPARE in
12661 order to separate the two instuctions. */
12662 next_insn = next_active_insn (insn);
12663 if (next_insn != NULL_RTX && INSN_P (next_insn)
12664 && s390_non_addr_reg_read_p (*op1, next_insn))
12665 {
12666 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12667 && s390_non_addr_reg_read_p (*op0, prev_insn))
12668 {
12669 if (REGNO (*op1) == 0)
12670 emit_insn_after (gen_nop1 (), insn);
12671 else
12672 emit_insn_after (gen_nop (), insn);
12673 insn_added_p = true;
12674 }
12675 else
12676 s390_swap_cmp (cond, op0, op1, insn);
12677 }
12678 return insn_added_p;
12679 }
12680
12681 /* Perform machine-dependent processing. */
12682
12683 static void
12684 s390_reorg (void)
12685 {
12686 bool pool_overflow = false;
12687 int hw_before, hw_after;
12688
12689 /* Make sure all splits have been performed; splits after
12690 machine_dependent_reorg might confuse insn length counts. */
12691 split_all_insns_noflow ();
12692
12693 /* Install the main literal pool and the associated base
12694 register load insns.
12695
12696 In addition, there are two problematic situations we need
12697 to correct:
12698
12699 - the literal pool might be > 4096 bytes in size, so that
12700 some of its elements cannot be directly accessed
12701
12702 - a branch target might be > 64K away from the branch, so that
12703 it is not possible to use a PC-relative instruction.
12704
12705 To fix those, we split the single literal pool into multiple
12706 pool chunks, reloading the pool base register at various
12707 points throughout the function to ensure it always points to
12708 the pool chunk the following code expects, and / or replace
12709 PC-relative branches by absolute branches.
12710
12711 However, the two problems are interdependent: splitting the
12712 literal pool can move a branch further away from its target,
12713 causing the 64K limit to overflow, and on the other hand,
12714 replacing a PC-relative branch by an absolute branch means
12715 we need to put the branch target address into the literal
12716 pool, possibly causing it to overflow.
12717
12718 So, we loop trying to fix up both problems until we manage
12719 to satisfy both conditions at the same time. Note that the
12720 loop is guaranteed to terminate as every pass of the loop
12721 strictly decreases the total number of PC-relative branches
12722 in the function. (This is not completely true as there
12723 might be branch-over-pool insns introduced by chunkify_start.
12724 Those never need to be split however.) */
12725
12726 for (;;)
12727 {
12728 struct constant_pool *pool = NULL;
12729
12730 /* Collect the literal pool. */
12731 if (!pool_overflow)
12732 {
12733 pool = s390_mainpool_start ();
12734 if (!pool)
12735 pool_overflow = true;
12736 }
12737
12738 /* If literal pool overflowed, start to chunkify it. */
12739 if (pool_overflow)
12740 pool = s390_chunkify_start ();
12741
12742 /* Split out-of-range branches. If this has created new
12743 literal pool entries, cancel current chunk list and
12744 recompute it. zSeries machines have large branch
12745 instructions, so we never need to split a branch. */
12746 if (!TARGET_CPU_ZARCH && s390_split_branches ())
12747 {
12748 if (pool_overflow)
12749 s390_chunkify_cancel (pool);
12750 else
12751 s390_mainpool_cancel (pool);
12752
12753 continue;
12754 }
12755
12756 /* If we made it up to here, both conditions are satisfied.
12757 Finish up literal pool related changes. */
12758 if (pool_overflow)
12759 s390_chunkify_finish (pool);
12760 else
12761 s390_mainpool_finish (pool);
12762
12763 /* We're done splitting branches. */
12764 cfun->machine->split_branches_pending_p = false;
12765 break;
12766 }
12767
12768 /* Generate out-of-pool execute target insns. */
12769 if (TARGET_CPU_ZARCH)
12770 {
12771 rtx_insn *insn, *target;
12772 rtx label;
12773
12774 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12775 {
12776 label = s390_execute_label (insn);
12777 if (!label)
12778 continue;
12779
12780 gcc_assert (label != const0_rtx);
12781
12782 target = emit_label (XEXP (label, 0));
12783 INSN_ADDRESSES_NEW (target, -1);
12784
12785 target = emit_insn (s390_execute_target (insn));
12786 INSN_ADDRESSES_NEW (target, -1);
12787 }
12788 }
12789
12790 /* Try to optimize prologue and epilogue further. */
12791 s390_optimize_prologue ();
12792
12793 /* Walk over the insns and do some >=z10 specific changes. */
12794 if (s390_tune == PROCESSOR_2097_Z10
12795 || s390_tune == PROCESSOR_2817_Z196
12796 || s390_tune == PROCESSOR_2827_ZEC12
12797 || s390_tune == PROCESSOR_2964_Z13)
12798 {
12799 rtx_insn *insn;
12800 bool insn_added_p = false;
12801
12802 /* The insn lengths and addresses have to be up to date for the
12803 following manipulations. */
12804 shorten_branches (get_insns ());
12805
12806 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12807 {
12808 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
12809 continue;
12810
12811 if (JUMP_P (insn))
12812 insn_added_p |= s390_fix_long_loop_prediction (insn);
12813
12814 if ((GET_CODE (PATTERN (insn)) == PARALLEL
12815 || GET_CODE (PATTERN (insn)) == SET)
12816 && s390_tune == PROCESSOR_2097_Z10)
12817 insn_added_p |= s390_z10_optimize_cmp (insn);
12818 }
12819
12820 /* Adjust branches if we added new instructions. */
12821 if (insn_added_p)
12822 shorten_branches (get_insns ());
12823 }
12824
12825 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
12826 if (hw_after > 0)
12827 {
12828 rtx_insn *insn;
12829
12830 /* Insert NOPs for hotpatching. */
12831 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12832 /* Emit NOPs
12833 1. inside the area covered by debug information to allow setting
12834 breakpoints at the NOPs,
12835 2. before any insn which results in an asm instruction,
12836 3. before in-function labels to avoid jumping to the NOPs, for
12837 example as part of a loop,
12838 4. before any barrier in case the function is completely empty
12839 (__builtin_unreachable ()) and has neither internal labels nor
12840 active insns.
12841 */
12842 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
12843 break;
12844 /* Output a series of NOPs before the first active insn. */
12845 while (insn && hw_after > 0)
12846 {
12847 if (hw_after >= 3 && TARGET_CPU_ZARCH)
12848 {
12849 emit_insn_before (gen_nop_6_byte (), insn);
12850 hw_after -= 3;
12851 }
12852 else if (hw_after >= 2)
12853 {
12854 emit_insn_before (gen_nop_4_byte (), insn);
12855 hw_after -= 2;
12856 }
12857 else
12858 {
12859 emit_insn_before (gen_nop_2_byte (), insn);
12860 hw_after -= 1;
12861 }
12862 }
12863 }
12864 }
12865
12866 /* Return true if INSN is a fp load insn writing register REGNO. */
12867 static inline bool
12868 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
12869 {
12870 rtx set;
12871 enum attr_type flag = s390_safe_attr_type (insn);
12872
12873 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
12874 return false;
12875
12876 set = single_set (insn);
12877
12878 if (set == NULL_RTX)
12879 return false;
12880
12881 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
12882 return false;
12883
12884 if (REGNO (SET_DEST (set)) != regno)
12885 return false;
12886
12887 return true;
12888 }
12889
12890 /* This value describes the distance to be avoided between an
12891 aritmetic fp instruction and an fp load writing the same register.
12892 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
12893 fine but the exact value has to be avoided. Otherwise the FP
12894 pipeline will throw an exception causing a major penalty. */
12895 #define Z10_EARLYLOAD_DISTANCE 7
12896
12897 /* Rearrange the ready list in order to avoid the situation described
12898 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
12899 moved to the very end of the ready list. */
12900 static void
12901 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
12902 {
12903 unsigned int regno;
12904 int nready = *nready_p;
12905 rtx_insn *tmp;
12906 int i;
12907 rtx_insn *insn;
12908 rtx set;
12909 enum attr_type flag;
12910 int distance;
12911
12912 /* Skip DISTANCE - 1 active insns. */
12913 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
12914 distance > 0 && insn != NULL_RTX;
12915 distance--, insn = prev_active_insn (insn))
12916 if (CALL_P (insn) || JUMP_P (insn))
12917 return;
12918
12919 if (insn == NULL_RTX)
12920 return;
12921
12922 set = single_set (insn);
12923
12924 if (set == NULL_RTX || !REG_P (SET_DEST (set))
12925 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
12926 return;
12927
12928 flag = s390_safe_attr_type (insn);
12929
12930 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
12931 return;
12932
12933 regno = REGNO (SET_DEST (set));
12934 i = nready - 1;
12935
12936 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
12937 i--;
12938
12939 if (!i)
12940 return;
12941
12942 tmp = ready[i];
12943 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
12944 ready[0] = tmp;
12945 }
12946
12947
12948 /* The s390_sched_state variable tracks the state of the current or
12949 the last instruction group.
12950
12951 0,1,2 number of instructions scheduled in the current group
12952 3 the last group is complete - normal insns
12953 4 the last group was a cracked/expanded insn */
12954
12955 static int s390_sched_state;
12956
12957 #define S390_OOO_SCHED_STATE_NORMAL 3
12958 #define S390_OOO_SCHED_STATE_CRACKED 4
12959
12960 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
12961 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
12962 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
12963 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
12964
12965 static unsigned int
12966 s390_get_sched_attrmask (rtx_insn *insn)
12967 {
12968 unsigned int mask = 0;
12969
12970 if (get_attr_ooo_cracked (insn))
12971 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
12972 if (get_attr_ooo_expanded (insn))
12973 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
12974 if (get_attr_ooo_endgroup (insn))
12975 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
12976 if (get_attr_ooo_groupalone (insn))
12977 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
12978 return mask;
12979 }
12980
12981 /* Return the scheduling score for INSN. The higher the score the
12982 better. The score is calculated from the OOO scheduling attributes
12983 of INSN and the scheduling state s390_sched_state. */
12984 static int
12985 s390_sched_score (rtx_insn *insn)
12986 {
12987 unsigned int mask = s390_get_sched_attrmask (insn);
12988 int score = 0;
12989
12990 switch (s390_sched_state)
12991 {
12992 case 0:
12993 /* Try to put insns into the first slot which would otherwise
12994 break a group. */
12995 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
12996 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
12997 score += 5;
12998 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
12999 score += 10;
13000 case 1:
13001 /* Prefer not cracked insns while trying to put together a
13002 group. */
13003 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13004 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13005 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13006 score += 10;
13007 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
13008 score += 5;
13009 break;
13010 case 2:
13011 /* Prefer not cracked insns while trying to put together a
13012 group. */
13013 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13014 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13015 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13016 score += 10;
13017 /* Prefer endgroup insns in the last slot. */
13018 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
13019 score += 10;
13020 break;
13021 case S390_OOO_SCHED_STATE_NORMAL:
13022 /* Prefer not cracked insns if the last was not cracked. */
13023 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13024 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
13025 score += 5;
13026 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13027 score += 10;
13028 break;
13029 case S390_OOO_SCHED_STATE_CRACKED:
13030 /* Try to keep cracked insns together to prevent them from
13031 interrupting groups. */
13032 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13033 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13034 score += 5;
13035 break;
13036 }
13037 return score;
13038 }
13039
13040 /* This function is called via hook TARGET_SCHED_REORDER before
13041 issuing one insn from list READY which contains *NREADYP entries.
13042 For target z10 it reorders load instructions to avoid early load
13043 conflicts in the floating point pipeline */
13044 static int
13045 s390_sched_reorder (FILE *file, int verbose,
13046 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13047 {
13048 if (s390_tune == PROCESSOR_2097_Z10)
13049 if (reload_completed && *nreadyp > 1)
13050 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13051
13052 if ((s390_tune == PROCESSOR_2827_ZEC12
13053 || s390_tune == PROCESSOR_2964_Z13)
13054 && reload_completed
13055 && *nreadyp > 1)
13056 {
13057 int i;
13058 int last_index = *nreadyp - 1;
13059 int max_index = -1;
13060 int max_score = -1;
13061 rtx_insn *tmp;
13062
13063 /* Just move the insn with the highest score to the top (the
13064 end) of the list. A full sort is not needed since a conflict
13065 in the hazard recognition cannot happen. So the top insn in
13066 the ready list will always be taken. */
13067 for (i = last_index; i >= 0; i--)
13068 {
13069 int score;
13070
13071 if (recog_memoized (ready[i]) < 0)
13072 continue;
13073
13074 score = s390_sched_score (ready[i]);
13075 if (score > max_score)
13076 {
13077 max_score = score;
13078 max_index = i;
13079 }
13080 }
13081
13082 if (max_index != -1)
13083 {
13084 if (max_index != last_index)
13085 {
13086 tmp = ready[max_index];
13087 ready[max_index] = ready[last_index];
13088 ready[last_index] = tmp;
13089
13090 if (verbose > 5)
13091 fprintf (file,
13092 "move insn %d to the top of list\n",
13093 INSN_UID (ready[last_index]));
13094 }
13095 else if (verbose > 5)
13096 fprintf (file,
13097 "best insn %d already on top\n",
13098 INSN_UID (ready[last_index]));
13099 }
13100
13101 if (verbose > 5)
13102 {
13103 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13104 s390_sched_state);
13105
13106 for (i = last_index; i >= 0; i--)
13107 {
13108 if (recog_memoized (ready[i]) < 0)
13109 continue;
13110 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
13111 s390_sched_score (ready[i]));
13112 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
13113 PRINT_OOO_ATTR (ooo_cracked);
13114 PRINT_OOO_ATTR (ooo_expanded);
13115 PRINT_OOO_ATTR (ooo_endgroup);
13116 PRINT_OOO_ATTR (ooo_groupalone);
13117 #undef PRINT_OOO_ATTR
13118 fprintf (file, "\n");
13119 }
13120 }
13121 }
13122
13123 return s390_issue_rate ();
13124 }
13125
13126
13127 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13128 the scheduler has issued INSN. It stores the last issued insn into
13129 last_scheduled_insn in order to make it available for
13130 s390_sched_reorder. */
13131 static int
13132 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13133 {
13134 last_scheduled_insn = insn;
13135
13136 if ((s390_tune == PROCESSOR_2827_ZEC12
13137 || s390_tune == PROCESSOR_2964_Z13)
13138 && reload_completed
13139 && recog_memoized (insn) >= 0)
13140 {
13141 unsigned int mask = s390_get_sched_attrmask (insn);
13142
13143 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13144 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13145 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
13146 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
13147 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13148 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13149 else
13150 {
13151 /* Only normal insns are left (mask == 0). */
13152 switch (s390_sched_state)
13153 {
13154 case 0:
13155 case 1:
13156 case 2:
13157 case S390_OOO_SCHED_STATE_NORMAL:
13158 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
13159 s390_sched_state = 1;
13160 else
13161 s390_sched_state++;
13162
13163 break;
13164 case S390_OOO_SCHED_STATE_CRACKED:
13165 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13166 break;
13167 }
13168 }
13169 if (verbose > 5)
13170 {
13171 fprintf (file, "insn %d: ", INSN_UID (insn));
13172 #define PRINT_OOO_ATTR(ATTR) \
13173 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
13174 PRINT_OOO_ATTR (ooo_cracked);
13175 PRINT_OOO_ATTR (ooo_expanded);
13176 PRINT_OOO_ATTR (ooo_endgroup);
13177 PRINT_OOO_ATTR (ooo_groupalone);
13178 #undef PRINT_OOO_ATTR
13179 fprintf (file, "\n");
13180 fprintf (file, "sched state: %d\n", s390_sched_state);
13181 }
13182 }
13183
13184 if (GET_CODE (PATTERN (insn)) != USE
13185 && GET_CODE (PATTERN (insn)) != CLOBBER)
13186 return more - 1;
13187 else
13188 return more;
13189 }
13190
13191 static void
13192 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13193 int verbose ATTRIBUTE_UNUSED,
13194 int max_ready ATTRIBUTE_UNUSED)
13195 {
13196 last_scheduled_insn = NULL;
13197 s390_sched_state = 0;
13198 }
13199
13200 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13201 a new number struct loop *loop should be unrolled if tuned for cpus with
13202 a built-in stride prefetcher.
13203 The loop is analyzed for memory accesses by calling check_dpu for
13204 each rtx of the loop. Depending on the loop_depth and the amount of
13205 memory accesses a new number <=nunroll is returned to improve the
13206 behaviour of the hardware prefetch unit. */
13207 static unsigned
13208 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13209 {
13210 basic_block *bbs;
13211 rtx_insn *insn;
13212 unsigned i;
13213 unsigned mem_count = 0;
13214
13215 if (s390_tune != PROCESSOR_2097_Z10
13216 && s390_tune != PROCESSOR_2817_Z196
13217 && s390_tune != PROCESSOR_2827_ZEC12
13218 && s390_tune != PROCESSOR_2964_Z13)
13219 return nunroll;
13220
13221 /* Count the number of memory references within the loop body. */
13222 bbs = get_loop_body (loop);
13223 subrtx_iterator::array_type array;
13224 for (i = 0; i < loop->num_nodes; i++)
13225 FOR_BB_INSNS (bbs[i], insn)
13226 if (INSN_P (insn) && INSN_CODE (insn) != -1)
13227 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13228 if (MEM_P (*iter))
13229 mem_count += 1;
13230 free (bbs);
13231
13232 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
13233 if (mem_count == 0)
13234 return nunroll;
13235
13236 switch (loop_depth(loop))
13237 {
13238 case 1:
13239 return MIN (nunroll, 28 / mem_count);
13240 case 2:
13241 return MIN (nunroll, 22 / mem_count);
13242 default:
13243 return MIN (nunroll, 16 / mem_count);
13244 }
13245 }
13246
13247 static void
13248 s390_option_override (void)
13249 {
13250 unsigned int i;
13251 cl_deferred_option *opt;
13252 vec<cl_deferred_option> *v =
13253 (vec<cl_deferred_option> *) s390_deferred_options;
13254
13255 if (v)
13256 FOR_EACH_VEC_ELT (*v, i, opt)
13257 {
13258 switch (opt->opt_index)
13259 {
13260 case OPT_mhotpatch_:
13261 {
13262 int val1;
13263 int val2;
13264 char s[256];
13265 char *t;
13266
13267 strncpy (s, opt->arg, 256);
13268 s[255] = 0;
13269 t = strchr (s, ',');
13270 if (t != NULL)
13271 {
13272 *t = 0;
13273 t++;
13274 val1 = integral_argument (s);
13275 val2 = integral_argument (t);
13276 }
13277 else
13278 {
13279 val1 = -1;
13280 val2 = -1;
13281 }
13282 if (val1 == -1 || val2 == -1)
13283 {
13284 /* argument is not a plain number */
13285 error ("arguments to %qs should be non-negative integers",
13286 "-mhotpatch=n,m");
13287 break;
13288 }
13289 else if (val1 > s390_hotpatch_hw_max
13290 || val2 > s390_hotpatch_hw_max)
13291 {
13292 error ("argument to %qs is too large (max. %d)",
13293 "-mhotpatch=n,m", s390_hotpatch_hw_max);
13294 break;
13295 }
13296 s390_hotpatch_hw_before_label = val1;
13297 s390_hotpatch_hw_after_label = val2;
13298 break;
13299 }
13300 default:
13301 gcc_unreachable ();
13302 }
13303 }
13304
13305 /* Set up function hooks. */
13306 init_machine_status = s390_init_machine_status;
13307
13308 /* Architecture mode defaults according to ABI. */
13309 if (!(target_flags_explicit & MASK_ZARCH))
13310 {
13311 if (TARGET_64BIT)
13312 target_flags |= MASK_ZARCH;
13313 else
13314 target_flags &= ~MASK_ZARCH;
13315 }
13316
13317 /* Set the march default in case it hasn't been specified on
13318 cmdline. */
13319 if (s390_arch == PROCESSOR_max)
13320 {
13321 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
13322 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
13323 s390_arch_flags = processor_flags_table[(int)s390_arch];
13324 }
13325
13326 /* Determine processor to tune for. */
13327 if (s390_tune == PROCESSOR_max)
13328 {
13329 s390_tune = s390_arch;
13330 s390_tune_flags = s390_arch_flags;
13331 }
13332
13333 /* Sanity checks. */
13334 if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE)
13335 gcc_unreachable ();
13336 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
13337 error ("z/Architecture mode not supported on %s", s390_arch_string);
13338 if (TARGET_64BIT && !TARGET_ZARCH)
13339 error ("64-bit ABI not supported in ESA/390 mode");
13340
13341 /* Use hardware DFP if available and not explicitly disabled by
13342 user. E.g. with -m31 -march=z10 -mzarch */
13343 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
13344 target_flags |= MASK_HARD_DFP;
13345
13346 /* Enable hardware transactions if available and not explicitly
13347 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13348 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13349 target_flags |= MASK_OPT_HTM;
13350
13351 if (target_flags_explicit & MASK_OPT_VX)
13352 {
13353 if (TARGET_OPT_VX)
13354 {
13355 if (!TARGET_CPU_VX)
13356 error ("hardware vector support not available on %s",
13357 s390_arch_string);
13358 if (TARGET_SOFT_FLOAT)
13359 error ("hardware vector support not available with -msoft-float");
13360 }
13361 }
13362 else if (TARGET_CPU_VX)
13363 /* Enable vector support if available and not explicitly disabled
13364 by user. E.g. with -m31 -march=z13 -mzarch */
13365 target_flags |= MASK_OPT_VX;
13366
13367 if (TARGET_HARD_DFP && !TARGET_DFP)
13368 {
13369 if (target_flags_explicit & MASK_HARD_DFP)
13370 {
13371 if (!TARGET_CPU_DFP)
13372 error ("hardware decimal floating point instructions"
13373 " not available on %s", s390_arch_string);
13374 if (!TARGET_ZARCH)
13375 error ("hardware decimal floating point instructions"
13376 " not available in ESA/390 mode");
13377 }
13378 else
13379 target_flags &= ~MASK_HARD_DFP;
13380 }
13381
13382 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13383 {
13384 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13385 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13386
13387 target_flags &= ~MASK_HARD_DFP;
13388 }
13389
13390 /* Set processor cost function. */
13391 switch (s390_tune)
13392 {
13393 case PROCESSOR_2084_Z990:
13394 s390_cost = &z990_cost;
13395 break;
13396 case PROCESSOR_2094_Z9_109:
13397 s390_cost = &z9_109_cost;
13398 break;
13399 case PROCESSOR_2097_Z10:
13400 s390_cost = &z10_cost;
13401 break;
13402 case PROCESSOR_2817_Z196:
13403 s390_cost = &z196_cost;
13404 break;
13405 case PROCESSOR_2827_ZEC12:
13406 case PROCESSOR_2964_Z13:
13407 s390_cost = &zEC12_cost;
13408 break;
13409 default:
13410 s390_cost = &z900_cost;
13411 }
13412
13413 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13414 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13415 "in combination");
13416
13417 if (s390_stack_size)
13418 {
13419 if (s390_stack_guard >= s390_stack_size)
13420 error ("stack size must be greater than the stack guard value");
13421 else if (s390_stack_size > 1 << 16)
13422 error ("stack size must not be greater than 64k");
13423 }
13424 else if (s390_stack_guard)
13425 error ("-mstack-guard implies use of -mstack-size");
13426
13427 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13428 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13429 target_flags |= MASK_LONG_DOUBLE_128;
13430 #endif
13431
13432 if (s390_tune == PROCESSOR_2097_Z10
13433 || s390_tune == PROCESSOR_2817_Z196
13434 || s390_tune == PROCESSOR_2827_ZEC12
13435 || s390_tune == PROCESSOR_2964_Z13)
13436 {
13437 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13438 global_options.x_param_values,
13439 global_options_set.x_param_values);
13440 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13441 global_options.x_param_values,
13442 global_options_set.x_param_values);
13443 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13444 global_options.x_param_values,
13445 global_options_set.x_param_values);
13446 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13447 global_options.x_param_values,
13448 global_options_set.x_param_values);
13449 }
13450
13451 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13452 global_options.x_param_values,
13453 global_options_set.x_param_values);
13454 /* values for loop prefetching */
13455 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13456 global_options.x_param_values,
13457 global_options_set.x_param_values);
13458 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13459 global_options.x_param_values,
13460 global_options_set.x_param_values);
13461 /* s390 has more than 2 levels and the size is much larger. Since
13462 we are always running virtualized assume that we only get a small
13463 part of the caches above l1. */
13464 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13465 global_options.x_param_values,
13466 global_options_set.x_param_values);
13467 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13468 global_options.x_param_values,
13469 global_options_set.x_param_values);
13470 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13471 global_options.x_param_values,
13472 global_options_set.x_param_values);
13473
13474 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13475 requires the arch flags to be evaluated already. Since prefetching
13476 is beneficial on s390, we enable it if available. */
13477 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13478 flag_prefetch_loop_arrays = 1;
13479
13480 /* Use the alternative scheduling-pressure algorithm by default. */
13481 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13482 global_options.x_param_values,
13483 global_options_set.x_param_values);
13484
13485 if (TARGET_TPF)
13486 {
13487 /* Don't emit DWARF3/4 unless specifically selected. The TPF
13488 debuggers do not yet support DWARF 3/4. */
13489 if (!global_options_set.x_dwarf_strict)
13490 dwarf_strict = 1;
13491 if (!global_options_set.x_dwarf_version)
13492 dwarf_version = 2;
13493 }
13494
13495 /* Register a target-specific optimization-and-lowering pass
13496 to run immediately before prologue and epilogue generation.
13497
13498 Registering the pass must be done at start up. It's
13499 convenient to do it here. */
13500 opt_pass *new_pass = new pass_s390_early_mach (g);
13501 struct register_pass_info insert_pass_s390_early_mach =
13502 {
13503 new_pass, /* pass */
13504 "pro_and_epilogue", /* reference_pass_name */
13505 1, /* ref_pass_instance_number */
13506 PASS_POS_INSERT_BEFORE /* po_op */
13507 };
13508 register_pass (&insert_pass_s390_early_mach);
13509 }
13510
13511 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13512
13513 static bool
13514 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13515 unsigned int align ATTRIBUTE_UNUSED,
13516 enum by_pieces_operation op ATTRIBUTE_UNUSED,
13517 bool speed_p ATTRIBUTE_UNUSED)
13518 {
13519 return (size == 1 || size == 2
13520 || size == 4 || (TARGET_ZARCH && size == 8));
13521 }
13522
13523 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13524
13525 static void
13526 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13527 {
13528 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
13529 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
13530 tree call_efpc = build_call_expr (efpc, 0);
13531 tree fenv_var = create_tmp_var (unsigned_type_node);
13532
13533 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
13534 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
13535 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
13536 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
13537 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
13538 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
13539
13540 /* Generates the equivalent of feholdexcept (&fenv_var)
13541
13542 fenv_var = __builtin_s390_efpc ();
13543 __builtin_s390_sfpc (fenv_var & mask) */
13544 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
13545 tree new_fpc =
13546 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13547 build_int_cst (unsigned_type_node,
13548 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
13549 FPC_EXCEPTION_MASK)));
13550 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
13551 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
13552
13553 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
13554
13555 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
13556 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
13557 build_int_cst (unsigned_type_node,
13558 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
13559 *clear = build_call_expr (sfpc, 1, new_fpc);
13560
13561 /* Generates the equivalent of feupdateenv (fenv_var)
13562
13563 old_fpc = __builtin_s390_efpc ();
13564 __builtin_s390_sfpc (fenv_var);
13565 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
13566
13567 old_fpc = create_tmp_var (unsigned_type_node);
13568 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
13569 old_fpc, call_efpc);
13570
13571 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
13572
13573 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
13574 build_int_cst (unsigned_type_node,
13575 FPC_FLAGS_MASK));
13576 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
13577 build_int_cst (unsigned_type_node,
13578 FPC_FLAGS_SHIFT));
13579 tree atomic_feraiseexcept
13580 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13581 raise_old_except = build_call_expr (atomic_feraiseexcept,
13582 1, raise_old_except);
13583
13584 *update = build2 (COMPOUND_EXPR, void_type_node,
13585 build2 (COMPOUND_EXPR, void_type_node,
13586 store_old_fpc, set_new_fpc),
13587 raise_old_except);
13588
13589 #undef FPC_EXCEPTION_MASK
13590 #undef FPC_FLAGS_MASK
13591 #undef FPC_DXC_MASK
13592 #undef FPC_EXCEPTION_MASK_SHIFT
13593 #undef FPC_FLAGS_SHIFT
13594 #undef FPC_DXC_SHIFT
13595 }
13596
13597 /* Return the vector mode to be used for inner mode MODE when doing
13598 vectorization. */
13599 static machine_mode
13600 s390_preferred_simd_mode (machine_mode mode)
13601 {
13602 if (TARGET_VX)
13603 switch (mode)
13604 {
13605 case DFmode:
13606 return V2DFmode;
13607 case DImode:
13608 return V2DImode;
13609 case SImode:
13610 return V4SImode;
13611 case HImode:
13612 return V8HImode;
13613 case QImode:
13614 return V16QImode;
13615 default:;
13616 }
13617 return word_mode;
13618 }
13619
13620 /* Our hardware does not require vectors to be strictly aligned. */
13621 static bool
13622 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
13623 const_tree type ATTRIBUTE_UNUSED,
13624 int misalignment ATTRIBUTE_UNUSED,
13625 bool is_packed ATTRIBUTE_UNUSED)
13626 {
13627 return true;
13628 }
13629
13630 /* The vector ABI requires vector types to be aligned on an 8 byte
13631 boundary (our stack alignment). However, we allow this to be
13632 overriden by the user, while this definitely breaks the ABI. */
13633 static HOST_WIDE_INT
13634 s390_vector_alignment (const_tree type)
13635 {
13636 if (!TARGET_VX_ABI)
13637 return default_vector_alignment (type);
13638
13639 if (TYPE_USER_ALIGN (type))
13640 return TYPE_ALIGN (type);
13641
13642 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
13643 }
13644
13645
13646 /* Return true if TYPE is a vector bool type. */
13647 static inline bool
13648 s390_vector_bool_type_p (const_tree type)
13649 {
13650 return TYPE_VECTOR_OPAQUE (type);
13651 }
13652
13653 /* Return the diagnostic message string if the binary operation OP is
13654 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13655 static const char*
13656 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
13657 {
13658 bool bool1_p, bool2_p;
13659 bool plusminus_p;
13660 bool muldiv_p;
13661 bool compare_p;
13662 machine_mode mode1, mode2;
13663
13664 if (!TARGET_ZVECTOR)
13665 return NULL;
13666
13667 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
13668 return NULL;
13669
13670 bool1_p = s390_vector_bool_type_p (type1);
13671 bool2_p = s390_vector_bool_type_p (type2);
13672
13673 /* Mixing signed and unsigned types is forbidden for all
13674 operators. */
13675 if (!bool1_p && !bool2_p
13676 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
13677 return N_("types differ in signess");
13678
13679 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
13680 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
13681 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
13682 || op == ROUND_DIV_EXPR);
13683 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
13684 || op == EQ_EXPR || op == NE_EXPR);
13685
13686 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
13687 return N_("binary operator does not support two vector bool operands");
13688
13689 if (bool1_p != bool2_p && (muldiv_p || compare_p))
13690 return N_("binary operator does not support vector bool operand");
13691
13692 mode1 = TYPE_MODE (type1);
13693 mode2 = TYPE_MODE (type2);
13694
13695 if (bool1_p != bool2_p && plusminus_p
13696 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
13697 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
13698 return N_("binary operator does not support mixing vector "
13699 "bool with floating point vector operands");
13700
13701 return NULL;
13702 }
13703
13704 /* Initialize GCC target structure. */
13705
13706 #undef TARGET_ASM_ALIGNED_HI_OP
13707 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
13708 #undef TARGET_ASM_ALIGNED_DI_OP
13709 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
13710 #undef TARGET_ASM_INTEGER
13711 #define TARGET_ASM_INTEGER s390_assemble_integer
13712
13713 #undef TARGET_ASM_OPEN_PAREN
13714 #define TARGET_ASM_OPEN_PAREN ""
13715
13716 #undef TARGET_ASM_CLOSE_PAREN
13717 #define TARGET_ASM_CLOSE_PAREN ""
13718
13719 #undef TARGET_OPTION_OVERRIDE
13720 #define TARGET_OPTION_OVERRIDE s390_option_override
13721
13722 #undef TARGET_ENCODE_SECTION_INFO
13723 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
13724
13725 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13726 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13727
13728 #ifdef HAVE_AS_TLS
13729 #undef TARGET_HAVE_TLS
13730 #define TARGET_HAVE_TLS true
13731 #endif
13732 #undef TARGET_CANNOT_FORCE_CONST_MEM
13733 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
13734
13735 #undef TARGET_DELEGITIMIZE_ADDRESS
13736 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
13737
13738 #undef TARGET_LEGITIMIZE_ADDRESS
13739 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
13740
13741 #undef TARGET_RETURN_IN_MEMORY
13742 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
13743
13744 #undef TARGET_INIT_BUILTINS
13745 #define TARGET_INIT_BUILTINS s390_init_builtins
13746 #undef TARGET_EXPAND_BUILTIN
13747 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
13748 #undef TARGET_BUILTIN_DECL
13749 #define TARGET_BUILTIN_DECL s390_builtin_decl
13750
13751 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
13752 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
13753
13754 #undef TARGET_ASM_OUTPUT_MI_THUNK
13755 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
13756 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13757 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
13758
13759 #undef TARGET_SCHED_ADJUST_PRIORITY
13760 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
13761 #undef TARGET_SCHED_ISSUE_RATE
13762 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
13763 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13764 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
13765
13766 #undef TARGET_SCHED_VARIABLE_ISSUE
13767 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
13768 #undef TARGET_SCHED_REORDER
13769 #define TARGET_SCHED_REORDER s390_sched_reorder
13770 #undef TARGET_SCHED_INIT
13771 #define TARGET_SCHED_INIT s390_sched_init
13772
13773 #undef TARGET_CANNOT_COPY_INSN_P
13774 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
13775 #undef TARGET_RTX_COSTS
13776 #define TARGET_RTX_COSTS s390_rtx_costs
13777 #undef TARGET_ADDRESS_COST
13778 #define TARGET_ADDRESS_COST s390_address_cost
13779 #undef TARGET_REGISTER_MOVE_COST
13780 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
13781 #undef TARGET_MEMORY_MOVE_COST
13782 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
13783
13784 #undef TARGET_MACHINE_DEPENDENT_REORG
13785 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
13786
13787 #undef TARGET_VALID_POINTER_MODE
13788 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
13789
13790 #undef TARGET_BUILD_BUILTIN_VA_LIST
13791 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
13792 #undef TARGET_EXPAND_BUILTIN_VA_START
13793 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
13794 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13795 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
13796
13797 #undef TARGET_PROMOTE_FUNCTION_MODE
13798 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
13799 #undef TARGET_PASS_BY_REFERENCE
13800 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
13801
13802 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13803 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
13804 #undef TARGET_FUNCTION_ARG
13805 #define TARGET_FUNCTION_ARG s390_function_arg
13806 #undef TARGET_FUNCTION_ARG_ADVANCE
13807 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
13808 #undef TARGET_FUNCTION_VALUE
13809 #define TARGET_FUNCTION_VALUE s390_function_value
13810 #undef TARGET_LIBCALL_VALUE
13811 #define TARGET_LIBCALL_VALUE s390_libcall_value
13812 #undef TARGET_STRICT_ARGUMENT_NAMING
13813 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
13814
13815 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
13816 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
13817
13818 #undef TARGET_FIXED_CONDITION_CODE_REGS
13819 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
13820
13821 #undef TARGET_CC_MODES_COMPATIBLE
13822 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
13823
13824 #undef TARGET_INVALID_WITHIN_DOLOOP
13825 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
13826
13827 #ifdef HAVE_AS_TLS
13828 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
13829 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
13830 #endif
13831
13832 #undef TARGET_DWARF_FRAME_REG_MODE
13833 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
13834
13835 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
13836 #undef TARGET_MANGLE_TYPE
13837 #define TARGET_MANGLE_TYPE s390_mangle_type
13838 #endif
13839
13840 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13841 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13842
13843 #undef TARGET_VECTOR_MODE_SUPPORTED_P
13844 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
13845
13846 #undef TARGET_PREFERRED_RELOAD_CLASS
13847 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
13848
13849 #undef TARGET_SECONDARY_RELOAD
13850 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
13851
13852 #undef TARGET_LIBGCC_CMP_RETURN_MODE
13853 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
13854
13855 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
13856 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
13857
13858 #undef TARGET_LEGITIMATE_ADDRESS_P
13859 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
13860
13861 #undef TARGET_LEGITIMATE_CONSTANT_P
13862 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
13863
13864 #undef TARGET_LRA_P
13865 #define TARGET_LRA_P s390_lra_p
13866
13867 #undef TARGET_CAN_ELIMINATE
13868 #define TARGET_CAN_ELIMINATE s390_can_eliminate
13869
13870 #undef TARGET_CONDITIONAL_REGISTER_USAGE
13871 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
13872
13873 #undef TARGET_LOOP_UNROLL_ADJUST
13874 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
13875
13876 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13877 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
13878 #undef TARGET_TRAMPOLINE_INIT
13879 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
13880
13881 #undef TARGET_UNWIND_WORD_MODE
13882 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
13883
13884 #undef TARGET_CANONICALIZE_COMPARISON
13885 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
13886
13887 #undef TARGET_HARD_REGNO_SCRATCH_OK
13888 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
13889
13890 #undef TARGET_ATTRIBUTE_TABLE
13891 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
13892
13893 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
13894 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
13895
13896 #undef TARGET_SET_UP_BY_PROLOGUE
13897 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
13898
13899 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
13900 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
13901 s390_use_by_pieces_infrastructure_p
13902
13903 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
13904 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
13905
13906 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
13907 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
13908
13909 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
13910 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
13911
13912 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
13913 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
13914
13915 #undef TARGET_VECTOR_ALIGNMENT
13916 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
13917
13918 #undef TARGET_INVALID_BINARY_OP
13919 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
13920
13921 struct gcc_target targetm = TARGET_INITIALIZER;
13922
13923 #include "gt-s390.h"
This page took 0.61046 seconds and 4 git commands to generate.