]> gcc.gnu.org Git - gcc.git/blob - gcc/config/sh/sh.c
t-rs6000: Add dependence of cfglayout.h to rs6000.o.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Hitachi / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51
52 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
53
54 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
55 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
56
57 /* These are some macros to abstract register modes. */
58 #define CONST_OK_FOR_ADD(size) \
59 (TARGET_SHMEDIA ? CONST_OK_FOR_P (size) : CONST_OK_FOR_I (size))
60 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
61 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
62 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
63
64 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
65 int current_function_interrupt;
66
67 /* ??? The pragma interrupt support will not work for SH3. */
68 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
69 output code for the next function appropriate for an interrupt handler. */
70 int pragma_interrupt;
71
72 /* This is set by the trap_exit attribute for functions. It specifies
73 a trap number to be used in a trapa instruction at function exit
74 (instead of an rte instruction). */
75 int trap_exit;
76
77 /* This is used by the sp_switch attribute for functions. It specifies
78 a variable holding the address of the stack the interrupt function
79 should switch to/from at entry/exit. */
80 rtx sp_switch;
81
82 /* This is set by #pragma trapa, and is similar to the above, except that
83 the compiler doesn't emit code to preserve all registers. */
84 static int pragma_trapa;
85
86 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
87 which has a separate set of low regs for User and Supervisor modes.
88 This should only be used for the lowest level of interrupts. Higher levels
89 of interrupts must save the registers in case they themselves are
90 interrupted. */
91 int pragma_nosave_low_regs;
92
93 /* This is used for communication between SETUP_INCOMING_VARARGS and
94 sh_expand_prologue. */
95 int current_function_anonymous_args;
96
97 /* Global variables for machine-dependent things. */
98
99 /* Which cpu are we scheduling for. */
100 enum processor_type sh_cpu;
101
102 /* Saved operands from the last compare to use when we generate an scc
103 or bcc insn. */
104
105 rtx sh_compare_op0;
106 rtx sh_compare_op1;
107
108 /* Provides the class number of the smallest class containing
109 reg number. */
110
111 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
112 {
113 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
146 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
147 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
148 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
149 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
150 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
151 GENERAL_REGS,
152 };
153
154 char sh_register_names[FIRST_PSEUDO_REGISTER] \
155 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
156
157 char sh_additional_register_names[ADDREGNAMES_SIZE] \
158 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
159 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
160
161 /* Provide reg_class from a letter such as appears in the machine
162 description. *: target independently reserved letter.
163 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
164
165 enum reg_class reg_class_from_letter[] =
166 {
167 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
168 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
169 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
170 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
171 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
172 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
173 /* y */ FPUL_REGS, /* z */ R0_REGS
174 };
175
176 int assembler_dialect;
177
178 static void split_branches PARAMS ((rtx));
179 static int branch_dest PARAMS ((rtx));
180 static void force_into PARAMS ((rtx, rtx));
181 static void print_slot PARAMS ((rtx));
182 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
183 static void dump_table PARAMS ((rtx));
184 static int hi_const PARAMS ((rtx));
185 static int broken_move PARAMS ((rtx));
186 static int mova_p PARAMS ((rtx));
187 static rtx find_barrier PARAMS ((int, rtx, rtx));
188 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
189 static rtx gen_block_redirect PARAMS ((rtx, int, int));
190 static void sh_reorg PARAMS ((void));
191 static void output_stack_adjust PARAMS ((int, rtx, int, rtx (*) (rtx)));
192 static rtx frame_insn PARAMS ((rtx));
193 static rtx push PARAMS ((int));
194 static void pop PARAMS ((int));
195 static void push_regs PARAMS ((HARD_REG_SET *, int));
196 static int calc_live_regs PARAMS ((HARD_REG_SET *));
197 static void mark_use PARAMS ((rtx, rtx *));
198 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
199 static rtx mark_constant_pool_use PARAMS ((rtx));
200 const struct attribute_spec sh_attribute_table[];
201 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
202 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
203 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
204 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
205 static void sh_insert_attributes PARAMS ((tree, tree *));
206 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
207 static int sh_use_dfa_interface PARAMS ((void));
208 static int sh_issue_rate PARAMS ((void));
209 static bool sh_function_ok_for_sibcall PARAMS ((tree, tree));
210
211 static bool sh_cannot_modify_jumps_p PARAMS ((void));
212 static bool sh_ms_bitfield_layout_p PARAMS ((tree));
213
214 static void sh_init_builtins PARAMS ((void));
215 static void sh_media_init_builtins PARAMS ((void));
216 static rtx sh_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
217 static void sh_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
218 HOST_WIDE_INT, tree));
219 static int flow_dependent_p PARAMS ((rtx, rtx));
220 static void flow_dependent_p_1 PARAMS ((rtx, rtx, void *));
221 static int shiftcosts PARAMS ((rtx));
222 static int andcosts PARAMS ((rtx));
223 static int addsubcosts PARAMS ((rtx));
224 static int multcosts PARAMS ((rtx));
225 static bool unspec_caller_rtx_p PARAMS ((rtx));
226 static bool sh_cannot_copy_insn_p PARAMS ((rtx));
227 static bool sh_rtx_costs PARAMS ((rtx, int, int, int *));
228 static int sh_address_cost PARAMS ((rtx));
229 \f
230 /* Initialize the GCC target structure. */
231 #undef TARGET_ATTRIBUTE_TABLE
232 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
233
234 /* The next two are used for debug info when compiling with -gdwarf. */
235 #undef TARGET_ASM_UNALIGNED_HI_OP
236 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
237 #undef TARGET_ASM_UNALIGNED_SI_OP
238 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
239
240 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
241 #undef TARGET_ASM_UNALIGNED_DI_OP
242 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
243 #undef TARGET_ASM_ALIGNED_DI_OP
244 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
245
246 #undef TARGET_ASM_FUNCTION_EPILOGUE
247 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
248
249 #undef TARGET_ASM_OUTPUT_MI_THUNK
250 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
251
252 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
253 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
254
255 #undef TARGET_INSERT_ATTRIBUTES
256 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
257
258 #undef TARGET_SCHED_ADJUST_COST
259 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
260
261 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
262 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
263 sh_use_dfa_interface
264 #undef TARGET_SCHED_ISSUE_RATE
265 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
266
267 #undef TARGET_CANNOT_MODIFY_JUMPS_P
268 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
269
270 #undef TARGET_MS_BITFIELD_LAYOUT_P
271 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
272
273 #undef TARGET_INIT_BUILTINS
274 #define TARGET_INIT_BUILTINS sh_init_builtins
275 #undef TARGET_EXPAND_BUILTIN
276 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
277
278 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
279 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
280
281 #undef TARGET_CANNOT_COPY_INSN_P
282 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
283 #undef TARGET_RTX_COSTS
284 #define TARGET_RTX_COSTS sh_rtx_costs
285 #undef TARGET_ADDRESS_COST
286 #define TARGET_ADDRESS_COST sh_address_cost
287
288 #undef TARGET_MACHINE_DEPENDENT_REORG
289 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
290
291 #ifdef HAVE_AS_TLS
292 #undef TARGET_HAVE_TLS
293 #define TARGET_HAVE_TLS true
294 #endif
295
296 struct gcc_target targetm = TARGET_INITIALIZER;
297 \f
298 /* Print the operand address in x to the stream. */
299
300 void
301 print_operand_address (stream, x)
302 FILE *stream;
303 rtx x;
304 {
305 switch (GET_CODE (x))
306 {
307 case REG:
308 case SUBREG:
309 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
310 break;
311
312 case PLUS:
313 {
314 rtx base = XEXP (x, 0);
315 rtx index = XEXP (x, 1);
316
317 switch (GET_CODE (index))
318 {
319 case CONST_INT:
320 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
321 reg_names[true_regnum (base)]);
322 break;
323
324 case REG:
325 case SUBREG:
326 {
327 int base_num = true_regnum (base);
328 int index_num = true_regnum (index);
329
330 fprintf (stream, "@(r0,%s)",
331 reg_names[MAX (base_num, index_num)]);
332 break;
333 }
334
335 default:
336 debug_rtx (x);
337 abort ();
338 }
339 }
340 break;
341
342 case PRE_DEC:
343 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
344 break;
345
346 case POST_INC:
347 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
348 break;
349
350 default:
351 x = mark_constant_pool_use (x);
352 output_addr_const (stream, x);
353 break;
354 }
355 }
356
357 /* Print operand x (an rtx) in assembler syntax to file stream
358 according to modifier code.
359
360 '.' print a .s if insn needs delay slot
361 ',' print LOCAL_LABEL_PREFIX
362 '@' print trap, rte or rts depending upon pragma interruptness
363 '#' output a nop if there is nothing to put in the delay slot
364 ''' print likelyhood suffix (/u for unlikely).
365 'O' print a constant without the #
366 'R' print the LSW of a dp value - changes if in little endian
367 'S' print the MSW of a dp value - changes if in little endian
368 'T' print the next word of a dp value - same as 'R' in big endian mode.
369 'M' print an `x' if `m' will print `base,index'.
370 'N' print 'r63' if the operand is (const_int 0).
371 'm' print a pair `base,offset' or `base,index', for LD and ST.
372 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
373 'o' output an operator. */
374
375 void
376 print_operand (stream, x, code)
377 FILE *stream;
378 rtx x;
379 int code;
380 {
381 switch (code)
382 {
383 case '.':
384 if (final_sequence
385 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
386 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
387 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
388 break;
389 case ',':
390 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
391 break;
392 case '@':
393 if (trap_exit)
394 fprintf (stream, "trapa #%d", trap_exit);
395 else if (sh_cfun_interrupt_handler_p ())
396 fprintf (stream, "rte");
397 else
398 fprintf (stream, "rts");
399 break;
400 case '#':
401 /* Output a nop if there's nothing in the delay slot. */
402 if (dbr_sequence_length () == 0)
403 fprintf (stream, "\n\tnop");
404 break;
405 case '\'':
406 {
407 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
408
409 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
410 fputs ("/u", stream);
411 break;
412 }
413 case 'O':
414 x = mark_constant_pool_use (x);
415 output_addr_const (stream, x);
416 break;
417 case 'R':
418 fputs (reg_names[REGNO (x) + LSW], (stream));
419 break;
420 case 'S':
421 fputs (reg_names[REGNO (x) + MSW], (stream));
422 break;
423 case 'T':
424 /* Next word of a double. */
425 switch (GET_CODE (x))
426 {
427 case REG:
428 fputs (reg_names[REGNO (x) + 1], (stream));
429 break;
430 case MEM:
431 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
432 && GET_CODE (XEXP (x, 0)) != POST_INC)
433 x = adjust_address (x, SImode, 4);
434 print_operand_address (stream, XEXP (x, 0));
435 break;
436 default:
437 break;
438 }
439 break;
440 case 'o':
441 switch (GET_CODE (x))
442 {
443 case PLUS: fputs ("add", stream); break;
444 case MINUS: fputs ("sub", stream); break;
445 case MULT: fputs ("mul", stream); break;
446 case DIV: fputs ("div", stream); break;
447 case EQ: fputs ("eq", stream); break;
448 case NE: fputs ("ne", stream); break;
449 case GT: case LT: fputs ("gt", stream); break;
450 case GE: case LE: fputs ("ge", stream); break;
451 case GTU: case LTU: fputs ("gtu", stream); break;
452 case GEU: case LEU: fputs ("geu", stream); break;
453 default:
454 break;
455 }
456 break;
457 case 'M':
458 if (GET_CODE (x) == MEM
459 && GET_CODE (XEXP (x, 0)) == PLUS
460 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
461 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
462 fputc ('x', stream);
463 break;
464
465 case 'm':
466 if (GET_CODE (x) != MEM)
467 abort ();
468 x = XEXP (x, 0);
469 switch (GET_CODE (x))
470 {
471 case REG:
472 case SUBREG:
473 print_operand (stream, x, 0);
474 fputs (", 0", stream);
475 break;
476
477 case PLUS:
478 print_operand (stream, XEXP (x, 0), 0);
479 fputs (", ", stream);
480 print_operand (stream, XEXP (x, 1), 0);
481 break;
482
483 default:
484 abort ();
485 }
486 break;
487
488 case 'N':
489 if (x == CONST0_RTX (GET_MODE (x)))
490 {
491 fprintf ((stream), "r63");
492 break;
493 }
494 goto default_output;
495 case 'u':
496 if (GET_CODE (x) == CONST_INT)
497 {
498 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
499 break;
500 }
501 /* Fall through. */
502
503 default_output:
504 default:
505 switch (GET_CODE (x))
506 {
507 /* FIXME: We need this on SHmedia32 because reload generates
508 some sign-extended HI or QI loads into DImode registers
509 but, because Pmode is SImode, the address ends up with a
510 subreg:SI of the DImode register. Maybe reload should be
511 fixed so as to apply alter_subreg to such loads? */
512 case SUBREG:
513 if (SUBREG_BYTE (x) != 0
514 || GET_CODE (SUBREG_REG (x)) != REG)
515 abort ();
516
517 x = SUBREG_REG (x);
518 /* Fall through. */
519
520 case REG:
521 if (FP_REGISTER_P (REGNO (x))
522 && GET_MODE (x) == V16SFmode)
523 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
524 else if (FP_REGISTER_P (REGNO (x))
525 && GET_MODE (x) == V4SFmode)
526 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
527 else if (GET_CODE (x) == REG
528 && GET_MODE (x) == V2SFmode)
529 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
530 else if (FP_REGISTER_P (REGNO (x))
531 && GET_MODE_SIZE (GET_MODE (x)) > 4)
532 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
533 else
534 fputs (reg_names[REGNO (x)], (stream));
535 break;
536
537 case MEM:
538 output_address (XEXP (x, 0));
539 break;
540
541 case CONST:
542 if (TARGET_SHMEDIA
543 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
544 && GET_MODE (XEXP (x, 0)) == DImode
545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
546 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
547 {
548 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
549
550 fputc ('(', stream);
551 if (GET_CODE (val) == ASHIFTRT)
552 {
553 fputc ('(', stream);
554 if (GET_CODE (XEXP (val, 0)) == CONST)
555 fputc ('(', stream);
556 output_addr_const (stream, XEXP (val, 0));
557 if (GET_CODE (XEXP (val, 0)) == CONST)
558 fputc (')', stream);
559 fputs (" >> ", stream);
560 output_addr_const (stream, XEXP (val, 1));
561 fputc (')', stream);
562 }
563 else
564 {
565 if (GET_CODE (val) == CONST)
566 fputc ('(', stream);
567 output_addr_const (stream, val);
568 if (GET_CODE (val) == CONST)
569 fputc (')', stream);
570 }
571 fputs (" & 65535)", stream);
572 break;
573 }
574
575 /* Fall through. */
576 default:
577 if (TARGET_SH1)
578 fputc ('#', stream);
579 output_addr_const (stream, x);
580 break;
581 }
582 break;
583 }
584 }
585 \f
586 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
587 static void
588 force_into (value, target)
589 rtx value, target;
590 {
591 value = force_operand (value, target);
592 if (! rtx_equal_p (value, target))
593 emit_insn (gen_move_insn (target, value));
594 }
595
596 /* Emit code to perform a block move. Choose the best method.
597
598 OPERANDS[0] is the destination.
599 OPERANDS[1] is the source.
600 OPERANDS[2] is the size.
601 OPERANDS[3] is the alignment safe to use. */
602
603 int
604 expand_block_move (operands)
605 rtx *operands;
606 {
607 int align = INTVAL (operands[3]);
608 int constp = (GET_CODE (operands[2]) == CONST_INT);
609 int bytes = (constp ? INTVAL (operands[2]) : 0);
610
611 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
612 alignment, or if it isn't a multiple of 4 bytes, then fail. */
613 if (! constp || align < 4 || (bytes % 4 != 0))
614 return 0;
615
616 if (TARGET_HARD_SH4)
617 {
618 if (bytes < 12)
619 return 0;
620 else if (bytes == 12)
621 {
622 tree entry_name;
623 rtx sym;
624 rtx func_addr_rtx;
625 rtx r4 = gen_rtx (REG, SImode, 4);
626 rtx r5 = gen_rtx (REG, SImode, 5);
627
628 entry_name = get_identifier ("__movstrSI12_i4");
629
630 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
631 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
632 force_into (XEXP (operands[0], 0), r4);
633 force_into (XEXP (operands[1], 0), r5);
634 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
635 return 1;
636 }
637 else if (! TARGET_SMALLCODE)
638 {
639 tree entry_name;
640 rtx sym;
641 rtx func_addr_rtx;
642 int dwords;
643 rtx r4 = gen_rtx (REG, SImode, 4);
644 rtx r5 = gen_rtx (REG, SImode, 5);
645 rtx r6 = gen_rtx (REG, SImode, 6);
646
647 entry_name = get_identifier (bytes & 4
648 ? "__movstr_i4_odd"
649 : "__movstr_i4_even");
650 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
651 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
652 force_into (XEXP (operands[0], 0), r4);
653 force_into (XEXP (operands[1], 0), r5);
654
655 dwords = bytes >> 3;
656 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
657 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
658 return 1;
659 }
660 else
661 return 0;
662 }
663 if (bytes < 64)
664 {
665 char entry[30];
666 tree entry_name;
667 rtx sym;
668 rtx func_addr_rtx;
669 rtx r4 = gen_rtx_REG (SImode, 4);
670 rtx r5 = gen_rtx_REG (SImode, 5);
671
672 sprintf (entry, "__movstrSI%d", bytes);
673 entry_name = get_identifier (entry);
674 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
675 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
676 force_into (XEXP (operands[0], 0), r4);
677 force_into (XEXP (operands[1], 0), r5);
678 emit_insn (gen_block_move_real (func_addr_rtx));
679 return 1;
680 }
681
682 /* This is the same number of bytes as a memcpy call, but to a different
683 less common function name, so this will occasionally use more space. */
684 if (! TARGET_SMALLCODE)
685 {
686 tree entry_name;
687 rtx sym;
688 rtx func_addr_rtx;
689 int final_switch, while_loop;
690 rtx r4 = gen_rtx_REG (SImode, 4);
691 rtx r5 = gen_rtx_REG (SImode, 5);
692 rtx r6 = gen_rtx_REG (SImode, 6);
693
694 entry_name = get_identifier ("__movstr");
695 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
696 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
697 force_into (XEXP (operands[0], 0), r4);
698 force_into (XEXP (operands[1], 0), r5);
699
700 /* r6 controls the size of the move. 16 is decremented from it
701 for each 64 bytes moved. Then the negative bit left over is used
702 as an index into a list of move instructions. e.g., a 72 byte move
703 would be set up with size(r6) = 14, for one iteration through the
704 big while loop, and a switch of -2 for the last part. */
705
706 final_switch = 16 - ((bytes / 4) % 16);
707 while_loop = ((bytes / 4) / 16 - 1) * 16;
708 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
709 emit_insn (gen_block_lump_real (func_addr_rtx));
710 return 1;
711 }
712
713 return 0;
714 }
715
716 /* Prepare operands for a move define_expand; specifically, one of the
717 operands must be in a register. */
718
719 int
720 prepare_move_operands (operands, mode)
721 rtx operands[];
722 enum machine_mode mode;
723 {
724 if ((mode == SImode || mode == DImode)
725 && flag_pic
726 && ! ((mode == Pmode || mode == ptr_mode)
727 && tls_symbolic_operand (operands[1], Pmode) != 0))
728 {
729 rtx temp;
730 if (SYMBOLIC_CONST_P (operands[1]))
731 {
732 if (GET_CODE (operands[0]) == MEM)
733 operands[1] = force_reg (Pmode, operands[1]);
734 else if (TARGET_SHMEDIA
735 && GET_CODE (operands[1]) == LABEL_REF
736 && target_reg_operand (operands[0], mode))
737 /* It's ok. */;
738 else
739 {
740 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
741 operands[1] = legitimize_pic_address (operands[1], mode, temp);
742 }
743 }
744 else if (GET_CODE (operands[1]) == CONST
745 && GET_CODE (XEXP (operands[1], 0)) == PLUS
746 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
747 {
748 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
749 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
750 mode, temp);
751 operands[1] = expand_binop (mode, add_optab, temp,
752 XEXP (XEXP (operands[1], 0), 1),
753 no_new_pseudos ? temp
754 : gen_reg_rtx (Pmode),
755 0, OPTAB_LIB_WIDEN);
756 }
757 }
758
759 if (! reload_in_progress && ! reload_completed)
760 {
761 /* Copy the source to a register if both operands aren't registers. */
762 if (! register_operand (operands[0], mode)
763 && ! sh_register_operand (operands[1], mode))
764 operands[1] = copy_to_mode_reg (mode, operands[1]);
765
766 /* This case can happen while generating code to move the result
767 of a library call to the target. Reject `st r0,@(rX,rY)' because
768 reload will fail to find a spill register for rX, since r0 is already
769 being used for the source. */
770 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
771 && GET_CODE (operands[0]) == MEM
772 && GET_CODE (XEXP (operands[0], 0)) == PLUS
773 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
774 operands[1] = copy_to_mode_reg (mode, operands[1]);
775 }
776
777 if (mode == Pmode || mode == ptr_mode)
778 {
779 rtx op0, op1;
780 enum tls_model tls_kind;
781
782 op0 = operands[0];
783 op1 = operands[1];
784 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
785 {
786 rtx tga_op1, tga_ret, tmp, tmp2;
787
788
789 switch (tls_kind)
790 {
791 case TLS_MODEL_GLOBAL_DYNAMIC:
792 tga_ret = gen_rtx_REG (Pmode, R0_REG);
793 emit_insn (gen_tls_global_dynamic (tga_ret, op1));
794 op1 = tga_ret;
795 break;
796
797 case TLS_MODEL_LOCAL_DYNAMIC:
798 tga_ret = gen_rtx_REG (Pmode, R0_REG);
799 emit_insn (gen_tls_local_dynamic (tga_ret, op1));
800
801 tmp = gen_reg_rtx (Pmode);
802 emit_move_insn (tmp, tga_ret);
803
804 if (register_operand (op0, Pmode))
805 tmp2 = op0;
806 else
807 tmp2 = gen_reg_rtx (Pmode);
808
809 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
810 op1 = tmp2;
811 break;
812
813 case TLS_MODEL_INITIAL_EXEC:
814 if (! flag_pic)
815 emit_insn (gen_GOTaddr2picreg ());
816 tga_op1 = gen_reg_rtx (Pmode);
817 tmp = gen_sym2GOTTPOFF (op1);
818 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
819 op1 = tga_op1;
820 break;
821
822 case TLS_MODEL_LOCAL_EXEC:
823 tmp2 = gen_reg_rtx (Pmode);
824 emit_insn (gen_load_gbr (tmp2));
825 tmp = gen_reg_rtx (Pmode);
826 emit_insn (gen_symTPOFF2reg (tmp, op1));
827 RTX_UNCHANGING_P (tmp) = 1;
828
829 if (register_operand (op0, Pmode))
830 op1 = op0;
831 else
832 op1 = gen_reg_rtx (Pmode);
833
834 emit_insn (gen_addsi3 (op1, tmp, tmp2));
835 break;
836
837 default:
838 abort ();
839 }
840 operands[1] = op1;
841 }
842 }
843
844 return 0;
845 }
846
847 /* Prepare the operands for an scc instruction; make sure that the
848 compare has been done. */
849 rtx
850 prepare_scc_operands (code)
851 enum rtx_code code;
852 {
853 rtx t_reg = gen_rtx_REG (SImode, T_REG);
854 enum rtx_code oldcode = code;
855 enum machine_mode mode;
856
857 /* First need a compare insn. */
858 switch (code)
859 {
860 case NE:
861 /* It isn't possible to handle this case. */
862 abort ();
863 case LT:
864 code = GT;
865 break;
866 case LE:
867 code = GE;
868 break;
869 case LTU:
870 code = GTU;
871 break;
872 case LEU:
873 code = GEU;
874 break;
875 default:
876 break;
877 }
878 if (code != oldcode)
879 {
880 rtx tmp = sh_compare_op0;
881 sh_compare_op0 = sh_compare_op1;
882 sh_compare_op1 = tmp;
883 }
884
885 mode = GET_MODE (sh_compare_op0);
886 if (mode == VOIDmode)
887 mode = GET_MODE (sh_compare_op1);
888
889 sh_compare_op0 = force_reg (mode, sh_compare_op0);
890 if ((code != EQ && code != NE
891 && (sh_compare_op1 != const0_rtx
892 || code == GTU || code == GEU || code == LTU || code == LEU))
893 || (mode == DImode && sh_compare_op1 != const0_rtx)
894 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
895 sh_compare_op1 = force_reg (mode, sh_compare_op1);
896
897 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
898 (mode == SFmode ? emit_sf_insn : emit_df_insn)
899 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
900 gen_rtx (SET, VOIDmode, t_reg,
901 gen_rtx (code, SImode,
902 sh_compare_op0, sh_compare_op1)),
903 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
904 else
905 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
906 gen_rtx (code, SImode, sh_compare_op0,
907 sh_compare_op1)));
908
909 return t_reg;
910 }
911
912 /* Called from the md file, set up the operands of a compare instruction. */
913
914 void
915 from_compare (operands, code)
916 rtx *operands;
917 int code;
918 {
919 enum machine_mode mode = GET_MODE (sh_compare_op0);
920 rtx insn;
921 if (mode == VOIDmode)
922 mode = GET_MODE (sh_compare_op1);
923 if (code != EQ
924 || mode == DImode
925 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
926 {
927 /* Force args into regs, since we can't use constants here. */
928 sh_compare_op0 = force_reg (mode, sh_compare_op0);
929 if (sh_compare_op1 != const0_rtx
930 || code == GTU || code == GEU
931 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
932 sh_compare_op1 = force_reg (mode, sh_compare_op1);
933 }
934 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
935 {
936 from_compare (operands, GT);
937 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
938 }
939 else
940 insn = gen_rtx_SET (VOIDmode,
941 gen_rtx_REG (SImode, T_REG),
942 gen_rtx (code, SImode, sh_compare_op0,
943 sh_compare_op1));
944 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
945 {
946 insn = gen_rtx (PARALLEL, VOIDmode,
947 gen_rtvec (2, insn,
948 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
949 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
950 }
951 else
952 emit_insn (insn);
953 }
954 \f
955 /* Functions to output assembly code. */
956
957 /* Return a sequence of instructions to perform DI or DF move.
958
959 Since the SH cannot move a DI or DF in one instruction, we have
960 to take care when we see overlapping source and dest registers. */
961
962 const char *
963 output_movedouble (insn, operands, mode)
964 rtx insn ATTRIBUTE_UNUSED;
965 rtx operands[];
966 enum machine_mode mode;
967 {
968 rtx dst = operands[0];
969 rtx src = operands[1];
970
971 if (GET_CODE (dst) == MEM
972 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
973 return "mov.l %T1,%0\n\tmov.l %1,%0";
974
975 if (register_operand (dst, mode)
976 && register_operand (src, mode))
977 {
978 if (REGNO (src) == MACH_REG)
979 return "sts mach,%S0\n\tsts macl,%R0";
980
981 /* When mov.d r1,r2 do r2->r3 then r1->r2;
982 when mov.d r1,r0 do r1->r0 then r2->r1. */
983
984 if (REGNO (src) + 1 == REGNO (dst))
985 return "mov %T1,%T0\n\tmov %1,%0";
986 else
987 return "mov %1,%0\n\tmov %T1,%T0";
988 }
989 else if (GET_CODE (src) == CONST_INT)
990 {
991 if (INTVAL (src) < 0)
992 output_asm_insn ("mov #-1,%S0", operands);
993 else
994 output_asm_insn ("mov #0,%S0", operands);
995
996 return "mov %1,%R0";
997 }
998 else if (GET_CODE (src) == MEM)
999 {
1000 int ptrreg = -1;
1001 int dreg = REGNO (dst);
1002 rtx inside = XEXP (src, 0);
1003
1004 if (GET_CODE (inside) == REG)
1005 ptrreg = REGNO (inside);
1006 else if (GET_CODE (inside) == SUBREG)
1007 ptrreg = subreg_regno (inside);
1008 else if (GET_CODE (inside) == PLUS)
1009 {
1010 ptrreg = REGNO (XEXP (inside, 0));
1011 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1012 an offsettable address. Unfortunately, offsettable addresses use
1013 QImode to check the offset, and a QImode offsettable address
1014 requires r0 for the other operand, which is not currently
1015 supported, so we can't use the 'o' constraint.
1016 Thus we must check for and handle r0+REG addresses here.
1017 We punt for now, since this is likely very rare. */
1018 if (GET_CODE (XEXP (inside, 1)) == REG)
1019 abort ();
1020 }
1021 else if (GET_CODE (inside) == LABEL_REF)
1022 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1023 else if (GET_CODE (inside) == POST_INC)
1024 return "mov.l %1,%0\n\tmov.l %1,%T0";
1025 else
1026 abort ();
1027
1028 /* Work out the safe way to copy. Copy into the second half first. */
1029 if (dreg == ptrreg)
1030 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1031 }
1032
1033 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1034 }
1035
1036 /* Print an instruction which would have gone into a delay slot after
1037 another instruction, but couldn't because the other instruction expanded
1038 into a sequence where putting the slot insn at the end wouldn't work. */
1039
1040 static void
1041 print_slot (insn)
1042 rtx insn;
1043 {
1044 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
1045
1046 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1047 }
1048
1049 const char *
1050 output_far_jump (insn, op)
1051 rtx insn;
1052 rtx op;
1053 {
1054 struct { rtx lab, reg, op; } this;
1055 rtx braf_base_lab = NULL_RTX;
1056 const char *jump;
1057 int far;
1058 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1059 rtx prev;
1060
1061 this.lab = gen_label_rtx ();
1062
1063 if (TARGET_SH2
1064 && offset >= -32764
1065 && offset - get_attr_length (insn) <= 32766)
1066 {
1067 far = 0;
1068 jump = "mov.w %O0,%1; braf %1";
1069 }
1070 else
1071 {
1072 far = 1;
1073 if (flag_pic)
1074 {
1075 if (TARGET_SH2)
1076 jump = "mov.l %O0,%1; braf %1";
1077 else
1078 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1079 }
1080 else
1081 jump = "mov.l %O0,%1; jmp @%1";
1082 }
1083 /* If we have a scratch register available, use it. */
1084 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1085 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1086 {
1087 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1088 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1089 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1090 output_asm_insn (jump, &this.lab);
1091 if (dbr_sequence_length ())
1092 print_slot (final_sequence);
1093 else
1094 output_asm_insn ("nop", 0);
1095 }
1096 else
1097 {
1098 /* Output the delay slot insn first if any. */
1099 if (dbr_sequence_length ())
1100 print_slot (final_sequence);
1101
1102 this.reg = gen_rtx_REG (SImode, 13);
1103 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1104 Fortunately, MACL is fixed and call-clobbered, and we never
1105 need its value across jumps, so save r13 in it instead of in
1106 the stack. */
1107 if (TARGET_SH5)
1108 output_asm_insn ("lds r13, macl", 0);
1109 else
1110 output_asm_insn ("mov.l r13,@-r15", 0);
1111 output_asm_insn (jump, &this.lab);
1112 if (TARGET_SH5)
1113 output_asm_insn ("sts macl, r13", 0);
1114 else
1115 output_asm_insn ("mov.l @r15+,r13", 0);
1116 }
1117 if (far && flag_pic && TARGET_SH2)
1118 {
1119 braf_base_lab = gen_label_rtx ();
1120 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1121 CODE_LABEL_NUMBER (braf_base_lab));
1122 }
1123 if (far)
1124 output_asm_insn (".align 2", 0);
1125 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1126 this.op = op;
1127 if (far && flag_pic)
1128 {
1129 if (TARGET_SH2)
1130 this.lab = braf_base_lab;
1131 output_asm_insn (".long %O2-%O0", &this.lab);
1132 }
1133 else
1134 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1135 return "";
1136 }
1137
1138 /* Local label counter, used for constants in the pool and inside
1139 pattern branches. */
1140
1141 static int lf = 100;
1142
1143 /* Output code for ordinary branches. */
1144
1145 const char *
1146 output_branch (logic, insn, operands)
1147 int logic;
1148 rtx insn;
1149 rtx *operands;
1150 {
1151 switch (get_attr_length (insn))
1152 {
1153 case 6:
1154 /* This can happen if filling the delay slot has caused a forward
1155 branch to exceed its range (we could reverse it, but only
1156 when we know we won't overextend other branches; this should
1157 best be handled by relaxation).
1158 It can also happen when other condbranches hoist delay slot insn
1159 from their destination, thus leading to code size increase.
1160 But the branch will still be in the range -4092..+4098 bytes. */
1161
1162 if (! TARGET_RELAX)
1163 {
1164 int label = lf++;
1165 /* The call to print_slot will clobber the operands. */
1166 rtx op0 = operands[0];
1167
1168 /* If the instruction in the delay slot is annulled (true), then
1169 there is no delay slot where we can put it now. The only safe
1170 place for it is after the label. final will do that by default. */
1171
1172 if (final_sequence
1173 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1174 {
1175 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1176 ASSEMBLER_DIALECT ? "/" : ".", label);
1177 print_slot (final_sequence);
1178 }
1179 else
1180 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1181
1182 output_asm_insn ("bra\t%l0", &op0);
1183 fprintf (asm_out_file, "\tnop\n");
1184 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1185
1186 return "";
1187 }
1188 /* When relaxing, handle this like a short branch. The linker
1189 will fix it up if it still doesn't fit after relaxation. */
1190 case 2:
1191 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1192
1193 /* These are for SH2e, in which we have to account for the
1194 extra nop because of the hardware bug in annulled branches. */
1195 case 8:
1196 if (! TARGET_RELAX)
1197 {
1198 int label = lf++;
1199
1200 if (final_sequence
1201 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1202 abort ();
1203 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1204 logic ? "f" : "t",
1205 ASSEMBLER_DIALECT ? "/" : ".", label);
1206 fprintf (asm_out_file, "\tnop\n");
1207 output_asm_insn ("bra\t%l0", operands);
1208 fprintf (asm_out_file, "\tnop\n");
1209 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1210
1211 return "";
1212 }
1213 /* When relaxing, fall through. */
1214 case 4:
1215 {
1216 char buffer[10];
1217
1218 sprintf (buffer, "b%s%ss\t%%l0",
1219 logic ? "t" : "f",
1220 ASSEMBLER_DIALECT ? "/" : ".");
1221 output_asm_insn (buffer, &operands[0]);
1222 return "nop";
1223 }
1224
1225 default:
1226 /* There should be no longer branches now - that would
1227 indicate that something has destroyed the branches set
1228 up in machine_dependent_reorg. */
1229 abort ();
1230 }
1231 }
1232
1233 const char *
1234 output_branchy_insn (code, template, insn, operands)
1235 enum rtx_code code;
1236 const char *template;
1237 rtx insn;
1238 rtx *operands;
1239 {
1240 rtx next_insn = NEXT_INSN (insn);
1241
1242 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1243 {
1244 rtx src = SET_SRC (PATTERN (next_insn));
1245 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1246 {
1247 /* Following branch not taken */
1248 operands[9] = gen_label_rtx ();
1249 emit_label_after (operands[9], next_insn);
1250 INSN_ADDRESSES_NEW (operands[9],
1251 INSN_ADDRESSES (INSN_UID (next_insn))
1252 + get_attr_length (next_insn));
1253 return template;
1254 }
1255 else
1256 {
1257 int offset = (branch_dest (next_insn)
1258 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1259 if (offset >= -252 && offset <= 258)
1260 {
1261 if (GET_CODE (src) == IF_THEN_ELSE)
1262 /* branch_true */
1263 src = XEXP (src, 1);
1264 operands[9] = src;
1265 return template;
1266 }
1267 }
1268 }
1269 operands[9] = gen_label_rtx ();
1270 emit_label_after (operands[9], insn);
1271 INSN_ADDRESSES_NEW (operands[9],
1272 INSN_ADDRESSES (INSN_UID (insn))
1273 + get_attr_length (insn));
1274 return template;
1275 }
1276
1277 const char *
1278 output_ieee_ccmpeq (insn, operands)
1279 rtx insn, *operands;
1280 {
1281 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1282 }
1283 \f
1284 /* Output to FILE the start of the assembler file. */
1285
1286 void
1287 output_file_start (file)
1288 FILE *file;
1289 {
1290 output_file_directive (file, main_input_filename);
1291
1292 /* Switch to the data section so that the coffsem symbol
1293 isn't in the text section. */
1294 data_section ();
1295
1296 if (TARGET_LITTLE_ENDIAN)
1297 fprintf (file, "\t.little\n");
1298
1299 if (TARGET_SHCOMPACT)
1300 fprintf (file, "\t.mode\tSHcompact\n");
1301 else if (TARGET_SHMEDIA)
1302 fprintf (file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1303 TARGET_SHMEDIA64 ? 64 : 32);
1304 }
1305 \f
1306 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1307
1308 static bool
1309 unspec_caller_rtx_p (pat)
1310 rtx pat;
1311 {
1312 switch (GET_CODE (pat))
1313 {
1314 case CONST:
1315 return unspec_caller_rtx_p (XEXP (pat, 0));
1316 case PLUS:
1317 case MINUS:
1318 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1319 return true;
1320 return unspec_caller_rtx_p (XEXP (pat, 1));
1321 case UNSPEC:
1322 if (XINT (pat, 1) == UNSPEC_CALLER)
1323 return true;
1324 default:
1325 break;
1326 }
1327
1328 return false;
1329 }
1330
1331 /* Indicate that INSN cannot be duplicated. This is true for insn
1332 that generates an unique label. */
1333
1334 static bool
1335 sh_cannot_copy_insn_p (insn)
1336 rtx insn;
1337 {
1338 rtx pat;
1339
1340 if (!reload_completed || !flag_pic)
1341 return false;
1342
1343 if (GET_CODE (insn) != INSN)
1344 return false;
1345 if (asm_noperands (insn) >= 0)
1346 return false;
1347
1348 pat = PATTERN (insn);
1349 if (GET_CODE (pat) != SET)
1350 return false;
1351 pat = SET_SRC (pat);
1352
1353 if (unspec_caller_rtx_p (pat))
1354 return true;
1355
1356 return false;
1357 }
1358 \f
1359 /* Actual number of instructions used to make a shift by N. */
1360 static const char ashiftrt_insns[] =
1361 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1362
1363 /* Left shift and logical right shift are the same. */
1364 static const char shift_insns[] =
1365 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1366
1367 /* Individual shift amounts needed to get the above length sequences.
1368 One bit right shifts clobber the T bit, so when possible, put one bit
1369 shifts in the middle of the sequence, so the ends are eligible for
1370 branch delay slots. */
1371 static const short shift_amounts[32][5] = {
1372 {0}, {1}, {2}, {2, 1},
1373 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1374 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1375 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1376 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1377 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1378 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1379 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1380
1381 /* Likewise, but for shift amounts < 16, up to three highmost bits
1382 might be clobbered. This is typically used when combined with some
1383 kind of sign or zero extension. */
1384
1385 static const char ext_shift_insns[] =
1386 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1387
1388 static const short ext_shift_amounts[32][4] = {
1389 {0}, {1}, {2}, {2, 1},
1390 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1391 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1392 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1393 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1394 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1395 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1396 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1397
1398 /* Assuming we have a value that has been sign-extended by at least one bit,
1399 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1400 to shift it by N without data loss, and quicker than by other means? */
1401 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1402
1403 /* This is used in length attributes in sh.md to help compute the length
1404 of arbitrary constant shift instructions. */
1405
1406 int
1407 shift_insns_rtx (insn)
1408 rtx insn;
1409 {
1410 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1411 int shift_count = INTVAL (XEXP (set_src, 1));
1412 enum rtx_code shift_code = GET_CODE (set_src);
1413
1414 switch (shift_code)
1415 {
1416 case ASHIFTRT:
1417 return ashiftrt_insns[shift_count];
1418 case LSHIFTRT:
1419 case ASHIFT:
1420 return shift_insns[shift_count];
1421 default:
1422 abort();
1423 }
1424 }
1425
1426 /* Return the cost of a shift. */
1427
1428 static inline int
1429 shiftcosts (x)
1430 rtx x;
1431 {
1432 int value;
1433
1434 if (TARGET_SHMEDIA)
1435 return 1;
1436
1437 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1438 {
1439 if (GET_MODE (x) == DImode
1440 && GET_CODE (XEXP (x, 1)) == CONST_INT
1441 && INTVAL (XEXP (x, 1)) == 1)
1442 return 2;
1443
1444 /* Everything else is invalid, because there is no pattern for it. */
1445 return 10000;
1446 }
1447 /* If shift by a non constant, then this will be expensive. */
1448 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1449 return SH_DYNAMIC_SHIFT_COST;
1450
1451 value = INTVAL (XEXP (x, 1));
1452
1453 /* Otherwise, return the true cost in instructions. */
1454 if (GET_CODE (x) == ASHIFTRT)
1455 {
1456 int cost = ashiftrt_insns[value];
1457 /* If SH3, then we put the constant in a reg and use shad. */
1458 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1459 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1460 return cost;
1461 }
1462 else
1463 return shift_insns[value];
1464 }
1465
1466 /* Return the cost of an AND operation. */
1467
1468 static inline int
1469 andcosts (x)
1470 rtx x;
1471 {
1472 int i;
1473
1474 /* Anding with a register is a single cycle and instruction. */
1475 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1476 return 1;
1477
1478 i = INTVAL (XEXP (x, 1));
1479
1480 if (TARGET_SHMEDIA)
1481 {
1482 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1483 && CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1484 || EXTRA_CONSTRAINT_S (XEXP (x, 1)))
1485 return 1;
1486 else
1487 return 2;
1488 }
1489
1490 /* These constants are single cycle extu.[bw] instructions. */
1491 if (i == 0xff || i == 0xffff)
1492 return 1;
1493 /* Constants that can be used in an and immediate instruction is a single
1494 cycle, but this requires r0, so make it a little more expensive. */
1495 if (CONST_OK_FOR_L (i))
1496 return 2;
1497 /* Constants that can be loaded with a mov immediate and an and.
1498 This case is probably unnecessary. */
1499 if (CONST_OK_FOR_I (i))
1500 return 2;
1501 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1502 This case is probably unnecessary. */
1503 return 3;
1504 }
1505
1506 /* Return the cost of an addition or a subtraction. */
1507
1508 static inline int
1509 addsubcosts (x)
1510 rtx x;
1511 {
1512 /* Adding a register is a single cycle insn. */
1513 if (GET_CODE (XEXP (x, 1)) == REG
1514 || GET_CODE (XEXP (x, 1)) == SUBREG)
1515 return 1;
1516
1517 /* Likewise for small constants. */
1518 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1519 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1520 return 1;
1521
1522 if (TARGET_SHMEDIA)
1523 switch (GET_CODE (XEXP (x, 1)))
1524 {
1525 case CONST:
1526 case LABEL_REF:
1527 case SYMBOL_REF:
1528 return TARGET_SHMEDIA64 ? 5 : 3;
1529
1530 case CONST_INT:
1531 if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1))))
1532 return 2;
1533 else if (CONST_OK_FOR_J (INTVAL (XEXP (x, 1)) >> 16))
1534 return 3;
1535 else if (CONST_OK_FOR_J ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1536 return 4;
1537
1538 /* Fall through. */
1539 default:
1540 return 5;
1541 }
1542
1543 /* Any other constant requires a 2 cycle pc-relative load plus an
1544 addition. */
1545 return 3;
1546 }
1547
1548 /* Return the cost of a multiply. */
1549 static inline int
1550 multcosts (x)
1551 rtx x ATTRIBUTE_UNUSED;
1552 {
1553 if (TARGET_SHMEDIA)
1554 return 3;
1555
1556 if (TARGET_SH2)
1557 {
1558 /* We have a mul insn, so we can never take more than the mul and the
1559 read of the mac reg, but count more because of the latency and extra
1560 reg usage. */
1561 if (TARGET_SMALLCODE)
1562 return 2;
1563 return 3;
1564 }
1565
1566 /* If we're aiming at small code, then just count the number of
1567 insns in a multiply call sequence. */
1568 if (TARGET_SMALLCODE)
1569 return 5;
1570
1571 /* Otherwise count all the insns in the routine we'd be calling too. */
1572 return 20;
1573 }
1574
1575 /* Compute a (partial) cost for rtx X. Return true if the complete
1576 cost has been computed, and false if subexpressions should be
1577 scanned. In either case, *TOTAL contains the cost result. */
1578
1579 static bool
1580 sh_rtx_costs (x, code, outer_code, total)
1581 rtx x;
1582 int code, outer_code, *total;
1583 {
1584 switch (code)
1585 {
1586 case CONST_INT:
1587 if (TARGET_SHMEDIA)
1588 {
1589 if (INTVAL (x) == 0)
1590 *total = 0;
1591 else if (outer_code == AND && and_operand ((x), DImode))
1592 *total = 0;
1593 else if ((outer_code == IOR || outer_code == XOR
1594 || outer_code == PLUS)
1595 && CONST_OK_FOR_P (INTVAL (x)))
1596 *total = 0;
1597 else if (CONST_OK_FOR_J (INTVAL (x)))
1598 *total = COSTS_N_INSNS (outer_code != SET);
1599 else if (CONST_OK_FOR_J (INTVAL (x) >> 16))
1600 *total = COSTS_N_INSNS (2);
1601 else if (CONST_OK_FOR_J ((INTVAL (x) >> 16) >> 16))
1602 *total = COSTS_N_INSNS (3);
1603 else
1604 *total = COSTS_N_INSNS (4);
1605 return true;
1606 }
1607 if (CONST_OK_FOR_I (INTVAL (x)))
1608 *total = 0;
1609 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1610 && CONST_OK_FOR_L (INTVAL (x)))
1611 *total = 1;
1612 else
1613 *total = 8;
1614 return true;
1615
1616 case CONST:
1617 case LABEL_REF:
1618 case SYMBOL_REF:
1619 if (TARGET_SHMEDIA64)
1620 *total = COSTS_N_INSNS (4);
1621 else if (TARGET_SHMEDIA32)
1622 *total = COSTS_N_INSNS (2);
1623 else
1624 *total = 5;
1625 return true;
1626
1627 case CONST_DOUBLE:
1628 if (TARGET_SHMEDIA)
1629 *total = COSTS_N_INSNS (4);
1630 else
1631 *total = 10;
1632 return true;
1633
1634 case PLUS:
1635 *total = COSTS_N_INSNS (addsubcosts (x));
1636 return true;
1637
1638 case AND:
1639 *total = COSTS_N_INSNS (andcosts (x));
1640 return true;
1641
1642 case MULT:
1643 *total = COSTS_N_INSNS (multcosts (x));
1644 return true;
1645
1646 case ASHIFT:
1647 case ASHIFTRT:
1648 case LSHIFTRT:
1649 *total = COSTS_N_INSNS (shiftcosts (x));
1650 return true;
1651
1652 case DIV:
1653 case UDIV:
1654 case MOD:
1655 case UMOD:
1656 *total = COSTS_N_INSNS (20);
1657 return true;
1658
1659 case FLOAT:
1660 case FIX:
1661 *total = 100;
1662 return true;
1663
1664 default:
1665 return false;
1666 }
1667 }
1668
1669 /* Compute the cost of an address. For the SH, all valid addresses are
1670 the same cost. Use a slightly higher cost for reg + reg addressing,
1671 since it increases pressure on r0. */
1672
1673 static int
1674 sh_address_cost (X)
1675 rtx X;
1676 {
1677 return (GET_CODE (X) == PLUS
1678 && ! CONSTANT_P (XEXP (X, 1))
1679 && ! TARGET_SHMEDIA ? 1 : 0);
1680 }
1681
1682 /* Code to expand a shift. */
1683
1684 void
1685 gen_ashift (type, n, reg)
1686 int type;
1687 int n;
1688 rtx reg;
1689 {
1690 /* Negative values here come from the shift_amounts array. */
1691 if (n < 0)
1692 {
1693 if (type == ASHIFT)
1694 type = LSHIFTRT;
1695 else
1696 type = ASHIFT;
1697 n = -n;
1698 }
1699
1700 switch (type)
1701 {
1702 case ASHIFTRT:
1703 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1704 break;
1705 case LSHIFTRT:
1706 if (n == 1)
1707 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1708 else
1709 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1710 break;
1711 case ASHIFT:
1712 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1713 break;
1714 }
1715 }
1716
1717 /* Same for HImode */
1718
1719 void
1720 gen_ashift_hi (type, n, reg)
1721 int type;
1722 int n;
1723 rtx reg;
1724 {
1725 /* Negative values here come from the shift_amounts array. */
1726 if (n < 0)
1727 {
1728 if (type == ASHIFT)
1729 type = LSHIFTRT;
1730 else
1731 type = ASHIFT;
1732 n = -n;
1733 }
1734
1735 switch (type)
1736 {
1737 case ASHIFTRT:
1738 case LSHIFTRT:
1739 /* We don't have HImode right shift operations because using the
1740 ordinary 32 bit shift instructions for that doesn't generate proper
1741 zero/sign extension.
1742 gen_ashift_hi is only called in contexts where we know that the
1743 sign extension works out correctly. */
1744 {
1745 int offset = 0;
1746 if (GET_CODE (reg) == SUBREG)
1747 {
1748 offset = SUBREG_BYTE (reg);
1749 reg = SUBREG_REG (reg);
1750 }
1751 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1752 break;
1753 }
1754 case ASHIFT:
1755 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1756 break;
1757 }
1758 }
1759
1760 /* Output RTL to split a constant shift into its component SH constant
1761 shift instructions. */
1762
1763 void
1764 gen_shifty_op (code, operands)
1765 int code;
1766 rtx *operands;
1767 {
1768 int value = INTVAL (operands[2]);
1769 int max, i;
1770
1771 /* Truncate the shift count in case it is out of bounds. */
1772 value = value & 0x1f;
1773
1774 if (value == 31)
1775 {
1776 if (code == LSHIFTRT)
1777 {
1778 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1779 emit_insn (gen_movt (operands[0]));
1780 return;
1781 }
1782 else if (code == ASHIFT)
1783 {
1784 /* There is a two instruction sequence for 31 bit left shifts,
1785 but it requires r0. */
1786 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1787 {
1788 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1789 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1790 return;
1791 }
1792 }
1793 }
1794 else if (value == 0)
1795 {
1796 /* This can happen when not optimizing. We must output something here
1797 to prevent the compiler from aborting in final.c after the try_split
1798 call. */
1799 emit_insn (gen_nop ());
1800 return;
1801 }
1802
1803 max = shift_insns[value];
1804 for (i = 0; i < max; i++)
1805 gen_ashift (code, shift_amounts[value][i], operands[0]);
1806 }
1807
1808 /* Same as above, but optimized for values where the topmost bits don't
1809 matter. */
1810
1811 void
1812 gen_shifty_hi_op (code, operands)
1813 int code;
1814 rtx *operands;
1815 {
1816 int value = INTVAL (operands[2]);
1817 int max, i;
1818 void (*gen_fun) PARAMS ((int, int, rtx));
1819
1820 /* This operation is used by and_shl for SImode values with a few
1821 high bits known to be cleared. */
1822 value &= 31;
1823 if (value == 0)
1824 {
1825 emit_insn (gen_nop ());
1826 return;
1827 }
1828
1829 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1830 if (code == ASHIFT)
1831 {
1832 max = ext_shift_insns[value];
1833 for (i = 0; i < max; i++)
1834 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1835 }
1836 else
1837 /* When shifting right, emit the shifts in reverse order, so that
1838 solitary negative values come first. */
1839 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1840 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1841 }
1842
1843 /* Output RTL for an arithmetic right shift. */
1844
1845 /* ??? Rewrite to use super-optimizer sequences. */
1846
1847 int
1848 expand_ashiftrt (operands)
1849 rtx *operands;
1850 {
1851 rtx sym;
1852 rtx wrk;
1853 char func[18];
1854 tree func_name;
1855 int value;
1856
1857 if (TARGET_SH3)
1858 {
1859 if (GET_CODE (operands[2]) != CONST_INT)
1860 {
1861 rtx count = copy_to_mode_reg (SImode, operands[2]);
1862 emit_insn (gen_negsi2 (count, count));
1863 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1864 return 1;
1865 }
1866 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1867 > 1 + SH_DYNAMIC_SHIFT_COST)
1868 {
1869 rtx count
1870 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1871 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1872 return 1;
1873 }
1874 }
1875 if (GET_CODE (operands[2]) != CONST_INT)
1876 return 0;
1877
1878 value = INTVAL (operands[2]) & 31;
1879
1880 if (value == 31)
1881 {
1882 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1883 return 1;
1884 }
1885 else if (value >= 16 && value <= 19)
1886 {
1887 wrk = gen_reg_rtx (SImode);
1888 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1889 value -= 16;
1890 while (value--)
1891 gen_ashift (ASHIFTRT, 1, wrk);
1892 emit_move_insn (operands[0], wrk);
1893 return 1;
1894 }
1895 /* Expand a short sequence inline, longer call a magic routine. */
1896 else if (value <= 5)
1897 {
1898 wrk = gen_reg_rtx (SImode);
1899 emit_move_insn (wrk, operands[1]);
1900 while (value--)
1901 gen_ashift (ASHIFTRT, 1, wrk);
1902 emit_move_insn (operands[0], wrk);
1903 return 1;
1904 }
1905
1906 wrk = gen_reg_rtx (Pmode);
1907
1908 /* Load the value into an arg reg and call a helper. */
1909 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1910 sprintf (func, "__ashiftrt_r4_%d", value);
1911 func_name = get_identifier (func);
1912 sym = function_symbol (IDENTIFIER_POINTER (func_name));
1913 emit_move_insn (wrk, sym);
1914 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1915 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1916 return 1;
1917 }
1918
1919 int
1920 sh_dynamicalize_shift_p (count)
1921 rtx count;
1922 {
1923 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1924 }
1925
1926 /* Try to find a good way to implement the combiner pattern
1927 [(set (match_operand:SI 0 "register_operand" "r")
1928 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1929 (match_operand:SI 2 "const_int_operand" "n"))
1930 (match_operand:SI 3 "const_int_operand" "n"))) .
1931 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1932 return 0 for simple right / left or left/right shift combination.
1933 return 1 for a combination of shifts with zero_extend.
1934 return 2 for a combination of shifts with an AND that needs r0.
1935 return 3 for a combination of shifts with an AND that needs an extra
1936 scratch register, when the three highmost bits of the AND mask are clear.
1937 return 4 for a combination of shifts with an AND that needs an extra
1938 scratch register, when any of the three highmost bits of the AND mask
1939 is set.
1940 If ATTRP is set, store an initial right shift width in ATTRP[0],
1941 and the instruction length in ATTRP[1] . These values are not valid
1942 when returning 0.
1943 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1944 shift_amounts for the last shift value that is to be used before the
1945 sign extend. */
1946 int
1947 shl_and_kind (left_rtx, mask_rtx, attrp)
1948 rtx left_rtx, mask_rtx;
1949 int *attrp;
1950 {
1951 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1952 int left = INTVAL (left_rtx), right;
1953 int best = 0;
1954 int cost, best_cost = 10000;
1955 int best_right = 0, best_len = 0;
1956 int i;
1957 int can_ext;
1958
1959 if (left < 0 || left > 31)
1960 return 0;
1961 if (GET_CODE (mask_rtx) == CONST_INT)
1962 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1963 else
1964 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1965 /* Can this be expressed as a right shift / left shift pair ? */
1966 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1967 right = exact_log2 (lsb);
1968 mask2 = ~(mask + lsb - 1);
1969 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1970 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1971 if (! mask2)
1972 best_cost = shift_insns[right] + shift_insns[right + left];
1973 /* mask has no trailing zeroes <==> ! right */
1974 else if (! right && mask2 == ~(lsb2 - 1))
1975 {
1976 int late_right = exact_log2 (lsb2);
1977 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1978 }
1979 /* Try to use zero extend */
1980 if (mask2 == ~(lsb2 - 1))
1981 {
1982 int width, first;
1983
1984 for (width = 8; width <= 16; width += 8)
1985 {
1986 /* Can we zero-extend right away? */
1987 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1988 {
1989 cost
1990 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1991 if (cost < best_cost)
1992 {
1993 best = 1;
1994 best_cost = cost;
1995 best_right = right;
1996 best_len = cost;
1997 if (attrp)
1998 attrp[2] = -1;
1999 }
2000 continue;
2001 }
2002 /* ??? Could try to put zero extend into initial right shift,
2003 or even shift a bit left before the right shift. */
2004 /* Determine value of first part of left shift, to get to the
2005 zero extend cut-off point. */
2006 first = width - exact_log2 (lsb2) + right;
2007 if (first >= 0 && right + left - first >= 0)
2008 {
2009 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2010 + ext_shift_insns[right + left - first];
2011 if (cost < best_cost)
2012 {
2013 best = 1;
2014 best_cost = cost;
2015 best_right = right;
2016 best_len = cost;
2017 if (attrp)
2018 attrp[2] = first;
2019 }
2020 }
2021 }
2022 }
2023 /* Try to use r0 AND pattern */
2024 for (i = 0; i <= 2; i++)
2025 {
2026 if (i > right)
2027 break;
2028 if (! CONST_OK_FOR_L (mask >> i))
2029 continue;
2030 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2031 if (cost < best_cost)
2032 {
2033 best = 2;
2034 best_cost = cost;
2035 best_right = i;
2036 best_len = cost - 1;
2037 }
2038 }
2039 /* Try to use a scratch register to hold the AND operand. */
2040 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2041 for (i = 0; i <= 2; i++)
2042 {
2043 if (i > right)
2044 break;
2045 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
2046 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2047 if (cost < best_cost)
2048 {
2049 best = 4 - can_ext;
2050 best_cost = cost;
2051 best_right = i;
2052 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
2053 }
2054 }
2055
2056 if (attrp)
2057 {
2058 attrp[0] = best_right;
2059 attrp[1] = best_len;
2060 }
2061 return best;
2062 }
2063
2064 /* This is used in length attributes of the unnamed instructions
2065 corresponding to shl_and_kind return values of 1 and 2. */
2066 int
2067 shl_and_length (insn)
2068 rtx insn;
2069 {
2070 rtx set_src, left_rtx, mask_rtx;
2071 int attributes[3];
2072
2073 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2074 left_rtx = XEXP (XEXP (set_src, 0), 1);
2075 mask_rtx = XEXP (set_src, 1);
2076 shl_and_kind (left_rtx, mask_rtx, attributes);
2077 return attributes[1];
2078 }
2079
2080 /* This is used in length attribute of the and_shl_scratch instruction. */
2081
2082 int
2083 shl_and_scr_length (insn)
2084 rtx insn;
2085 {
2086 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2087 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2088 rtx op = XEXP (set_src, 0);
2089 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2090 op = XEXP (XEXP (op, 0), 0);
2091 return len + shift_insns[INTVAL (XEXP (op, 1))];
2092 }
2093
2094 /* Generating rtl? */
2095 extern int rtx_equal_function_value_matters;
2096
2097 /* Generate rtl for instructions for which shl_and_kind advised a particular
2098 method of generating them, i.e. returned zero. */
2099
2100 int
2101 gen_shl_and (dest, left_rtx, mask_rtx, source)
2102 rtx dest, left_rtx, mask_rtx, source;
2103 {
2104 int attributes[3];
2105 unsigned HOST_WIDE_INT mask;
2106 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2107 int right, total_shift;
2108 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
2109
2110 right = attributes[0];
2111 total_shift = INTVAL (left_rtx) + right;
2112 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2113 switch (kind)
2114 {
2115 default:
2116 return -1;
2117 case 1:
2118 {
2119 int first = attributes[2];
2120 rtx operands[3];
2121
2122 if (first < 0)
2123 {
2124 emit_insn ((mask << right) <= 0xff
2125 ? gen_zero_extendqisi2(dest,
2126 gen_lowpart (QImode, source))
2127 : gen_zero_extendhisi2(dest,
2128 gen_lowpart (HImode, source)));
2129 source = dest;
2130 }
2131 if (source != dest)
2132 emit_insn (gen_movsi (dest, source));
2133 operands[0] = dest;
2134 if (right)
2135 {
2136 operands[2] = GEN_INT (right);
2137 gen_shifty_hi_op (LSHIFTRT, operands);
2138 }
2139 if (first > 0)
2140 {
2141 operands[2] = GEN_INT (first);
2142 gen_shifty_hi_op (ASHIFT, operands);
2143 total_shift -= first;
2144 mask <<= first;
2145 }
2146 if (first >= 0)
2147 emit_insn (mask <= 0xff
2148 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2149 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2150 if (total_shift > 0)
2151 {
2152 operands[2] = GEN_INT (total_shift);
2153 gen_shifty_hi_op (ASHIFT, operands);
2154 }
2155 break;
2156 }
2157 case 4:
2158 shift_gen_fun = gen_shifty_op;
2159 case 3:
2160 /* If the topmost bit that matters is set, set the topmost bits
2161 that don't matter. This way, we might be able to get a shorter
2162 signed constant. */
2163 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2164 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2165 case 2:
2166 /* Don't expand fine-grained when combining, because that will
2167 make the pattern fail. */
2168 if (rtx_equal_function_value_matters
2169 || reload_in_progress || reload_completed)
2170 {
2171 rtx operands[3];
2172
2173 /* Cases 3 and 4 should be handled by this split
2174 only while combining */
2175 if (kind > 2)
2176 abort ();
2177 if (right)
2178 {
2179 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2180 source = dest;
2181 }
2182 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2183 if (total_shift)
2184 {
2185 operands[0] = dest;
2186 operands[1] = dest;
2187 operands[2] = GEN_INT (total_shift);
2188 shift_gen_fun (ASHIFT, operands);
2189 }
2190 break;
2191 }
2192 else
2193 {
2194 int neg = 0;
2195 if (kind != 4 && total_shift < 16)
2196 {
2197 neg = -ext_shift_amounts[total_shift][1];
2198 if (neg > 0)
2199 neg -= ext_shift_amounts[total_shift][2];
2200 else
2201 neg = 0;
2202 }
2203 emit_insn (gen_and_shl_scratch (dest, source,
2204 GEN_INT (right),
2205 GEN_INT (mask),
2206 GEN_INT (total_shift + neg),
2207 GEN_INT (neg)));
2208 emit_insn (gen_movsi (dest, dest));
2209 break;
2210 }
2211 }
2212 return 0;
2213 }
2214
2215 /* Try to find a good way to implement the combiner pattern
2216 [(set (match_operand:SI 0 "register_operand" "=r")
2217 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2218 (match_operand:SI 2 "const_int_operand" "n")
2219 (match_operand:SI 3 "const_int_operand" "n")
2220 (const_int 0)))
2221 (clobber (reg:SI T_REG))]
2222 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2223 return 0 for simple left / right shift combination.
2224 return 1 for left shift / 8 bit sign extend / left shift.
2225 return 2 for left shift / 16 bit sign extend / left shift.
2226 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2227 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2228 return 5 for left shift / 16 bit sign extend / right shift
2229 return 6 for < 8 bit sign extend / left shift.
2230 return 7 for < 8 bit sign extend / left shift / single right shift.
2231 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2232
2233 int
2234 shl_sext_kind (left_rtx, size_rtx, costp)
2235 rtx left_rtx, size_rtx;
2236 int *costp;
2237 {
2238 int left, size, insize, ext;
2239 int cost = 0, best_cost;
2240 int kind;
2241
2242 left = INTVAL (left_rtx);
2243 size = INTVAL (size_rtx);
2244 insize = size - left;
2245 if (insize <= 0)
2246 abort ();
2247 /* Default to left / right shift. */
2248 kind = 0;
2249 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2250 if (size <= 16)
2251 {
2252 /* 16 bit shift / sign extend / 16 bit shift */
2253 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2254 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2255 below, by alternative 3 or something even better. */
2256 if (cost < best_cost)
2257 {
2258 kind = 5;
2259 best_cost = cost;
2260 }
2261 }
2262 /* Try a plain sign extend between two shifts. */
2263 for (ext = 16; ext >= insize; ext -= 8)
2264 {
2265 if (ext <= size)
2266 {
2267 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2268 if (cost < best_cost)
2269 {
2270 kind = ext / (unsigned) 8;
2271 best_cost = cost;
2272 }
2273 }
2274 /* Check if we can do a sloppy shift with a final signed shift
2275 restoring the sign. */
2276 if (EXT_SHIFT_SIGNED (size - ext))
2277 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2278 /* If not, maybe it's still cheaper to do the second shift sloppy,
2279 and do a final sign extend? */
2280 else if (size <= 16)
2281 cost = ext_shift_insns[ext - insize] + 1
2282 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2283 else
2284 continue;
2285 if (cost < best_cost)
2286 {
2287 kind = ext / (unsigned) 8 + 2;
2288 best_cost = cost;
2289 }
2290 }
2291 /* Check if we can sign extend in r0 */
2292 if (insize < 8)
2293 {
2294 cost = 3 + shift_insns[left];
2295 if (cost < best_cost)
2296 {
2297 kind = 6;
2298 best_cost = cost;
2299 }
2300 /* Try the same with a final signed shift. */
2301 if (left < 31)
2302 {
2303 cost = 3 + ext_shift_insns[left + 1] + 1;
2304 if (cost < best_cost)
2305 {
2306 kind = 7;
2307 best_cost = cost;
2308 }
2309 }
2310 }
2311 if (TARGET_SH3)
2312 {
2313 /* Try to use a dynamic shift. */
2314 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2315 if (cost < best_cost)
2316 {
2317 kind = 0;
2318 best_cost = cost;
2319 }
2320 }
2321 if (costp)
2322 *costp = cost;
2323 return kind;
2324 }
2325
2326 /* Function to be used in the length attribute of the instructions
2327 implementing this pattern. */
2328
2329 int
2330 shl_sext_length (insn)
2331 rtx insn;
2332 {
2333 rtx set_src, left_rtx, size_rtx;
2334 int cost;
2335
2336 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2337 left_rtx = XEXP (XEXP (set_src, 0), 1);
2338 size_rtx = XEXP (set_src, 1);
2339 shl_sext_kind (left_rtx, size_rtx, &cost);
2340 return cost;
2341 }
2342
2343 /* Generate rtl for this pattern */
2344
2345 int
2346 gen_shl_sext (dest, left_rtx, size_rtx, source)
2347 rtx dest, left_rtx, size_rtx, source;
2348 {
2349 int kind;
2350 int left, size, insize, cost;
2351 rtx operands[3];
2352
2353 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2354 left = INTVAL (left_rtx);
2355 size = INTVAL (size_rtx);
2356 insize = size - left;
2357 switch (kind)
2358 {
2359 case 1:
2360 case 2:
2361 case 3:
2362 case 4:
2363 {
2364 int ext = kind & 1 ? 8 : 16;
2365 int shift2 = size - ext;
2366
2367 /* Don't expand fine-grained when combining, because that will
2368 make the pattern fail. */
2369 if (! rtx_equal_function_value_matters
2370 && ! reload_in_progress && ! reload_completed)
2371 {
2372 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2373 emit_insn (gen_movsi (dest, source));
2374 break;
2375 }
2376 if (dest != source)
2377 emit_insn (gen_movsi (dest, source));
2378 operands[0] = dest;
2379 if (ext - insize)
2380 {
2381 operands[2] = GEN_INT (ext - insize);
2382 gen_shifty_hi_op (ASHIFT, operands);
2383 }
2384 emit_insn (kind & 1
2385 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2386 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2387 if (kind <= 2)
2388 {
2389 if (shift2)
2390 {
2391 operands[2] = GEN_INT (shift2);
2392 gen_shifty_op (ASHIFT, operands);
2393 }
2394 }
2395 else
2396 {
2397 if (shift2 > 0)
2398 {
2399 if (EXT_SHIFT_SIGNED (shift2))
2400 {
2401 operands[2] = GEN_INT (shift2 + 1);
2402 gen_shifty_op (ASHIFT, operands);
2403 operands[2] = GEN_INT (1);
2404 gen_shifty_op (ASHIFTRT, operands);
2405 break;
2406 }
2407 operands[2] = GEN_INT (shift2);
2408 gen_shifty_hi_op (ASHIFT, operands);
2409 }
2410 else if (shift2)
2411 {
2412 operands[2] = GEN_INT (-shift2);
2413 gen_shifty_hi_op (LSHIFTRT, operands);
2414 }
2415 emit_insn (size <= 8
2416 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2417 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2418 }
2419 break;
2420 }
2421 case 5:
2422 {
2423 int i = 16 - size;
2424 if (! rtx_equal_function_value_matters
2425 && ! reload_in_progress && ! reload_completed)
2426 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2427 else
2428 {
2429 operands[0] = dest;
2430 operands[2] = GEN_INT (16 - insize);
2431 gen_shifty_hi_op (ASHIFT, operands);
2432 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2433 }
2434 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2435 while (--i >= 0)
2436 gen_ashift (ASHIFTRT, 1, dest);
2437 break;
2438 }
2439 case 6:
2440 case 7:
2441 /* Don't expand fine-grained when combining, because that will
2442 make the pattern fail. */
2443 if (! rtx_equal_function_value_matters
2444 && ! reload_in_progress && ! reload_completed)
2445 {
2446 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2447 emit_insn (gen_movsi (dest, source));
2448 break;
2449 }
2450 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2451 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2452 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2453 operands[0] = dest;
2454 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2455 gen_shifty_op (ASHIFT, operands);
2456 if (kind == 7)
2457 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2458 break;
2459 default:
2460 return -1;
2461 }
2462 return 0;
2463 }
2464
2465 /* Prefix a symbol_ref name with "datalabel". */
2466
2467 rtx
2468 gen_datalabel_ref (sym)
2469 rtx sym;
2470 {
2471 if (GET_CODE (sym) == LABEL_REF)
2472 return gen_rtx_CONST (GET_MODE (sym),
2473 gen_rtx_UNSPEC (GET_MODE (sym),
2474 gen_rtvec (1, sym),
2475 UNSPEC_DATALABEL));
2476
2477 if (GET_CODE (sym) != SYMBOL_REF)
2478 abort ();
2479
2480 return sym;
2481 }
2482
2483 \f
2484 /* The SH cannot load a large constant into a register, constants have to
2485 come from a pc relative load. The reference of a pc relative load
2486 instruction must be less than 1k infront of the instruction. This
2487 means that we often have to dump a constant inside a function, and
2488 generate code to branch around it.
2489
2490 It is important to minimize this, since the branches will slow things
2491 down and make things bigger.
2492
2493 Worst case code looks like:
2494
2495 mov.l L1,rn
2496 bra L2
2497 nop
2498 align
2499 L1: .long value
2500 L2:
2501 ..
2502
2503 mov.l L3,rn
2504 bra L4
2505 nop
2506 align
2507 L3: .long value
2508 L4:
2509 ..
2510
2511 We fix this by performing a scan before scheduling, which notices which
2512 instructions need to have their operands fetched from the constant table
2513 and builds the table.
2514
2515 The algorithm is:
2516
2517 scan, find an instruction which needs a pcrel move. Look forward, find the
2518 last barrier which is within MAX_COUNT bytes of the requirement.
2519 If there isn't one, make one. Process all the instructions between
2520 the find and the barrier.
2521
2522 In the above example, we can tell that L3 is within 1k of L1, so
2523 the first move can be shrunk from the 3 insn+constant sequence into
2524 just 1 insn, and the constant moved to L3 to make:
2525
2526 mov.l L1,rn
2527 ..
2528 mov.l L3,rn
2529 bra L4
2530 nop
2531 align
2532 L3:.long value
2533 L4:.long value
2534
2535 Then the second move becomes the target for the shortening process. */
2536
2537 typedef struct
2538 {
2539 rtx value; /* Value in table. */
2540 rtx label; /* Label of value. */
2541 rtx wend; /* End of window. */
2542 enum machine_mode mode; /* Mode of value. */
2543
2544 /* True if this constant is accessed as part of a post-increment
2545 sequence. Note that HImode constants are never accessed in this way. */
2546 bool part_of_sequence_p;
2547 } pool_node;
2548
2549 /* The maximum number of constants that can fit into one pool, since
2550 the pc relative range is 0...1020 bytes and constants are at least 4
2551 bytes long. */
2552
2553 #define MAX_POOL_SIZE (1020/4)
2554 static pool_node pool_vector[MAX_POOL_SIZE];
2555 static int pool_size;
2556 static rtx pool_window_label;
2557 static int pool_window_last;
2558
2559 /* ??? If we need a constant in HImode which is the truncated value of a
2560 constant we need in SImode, we could combine the two entries thus saving
2561 two bytes. Is this common enough to be worth the effort of implementing
2562 it? */
2563
2564 /* ??? This stuff should be done at the same time that we shorten branches.
2565 As it is now, we must assume that all branches are the maximum size, and
2566 this causes us to almost always output constant pools sooner than
2567 necessary. */
2568
2569 /* Add a constant to the pool and return its label. */
2570
2571 static rtx
2572 add_constant (x, mode, last_value)
2573 rtx x;
2574 enum machine_mode mode;
2575 rtx last_value;
2576 {
2577 int i;
2578 rtx lab, new, ref, newref;
2579
2580 /* First see if we've already got it. */
2581 for (i = 0; i < pool_size; i++)
2582 {
2583 if (x->code == pool_vector[i].value->code
2584 && mode == pool_vector[i].mode)
2585 {
2586 if (x->code == CODE_LABEL)
2587 {
2588 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2589 continue;
2590 }
2591 if (rtx_equal_p (x, pool_vector[i].value))
2592 {
2593 lab = new = 0;
2594 if (! last_value
2595 || ! i
2596 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2597 {
2598 new = gen_label_rtx ();
2599 LABEL_REFS (new) = pool_vector[i].label;
2600 pool_vector[i].label = lab = new;
2601 }
2602 if (lab && pool_window_label)
2603 {
2604 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2605 ref = pool_vector[pool_window_last].wend;
2606 LABEL_NEXTREF (newref) = ref;
2607 pool_vector[pool_window_last].wend = newref;
2608 }
2609 if (new)
2610 pool_window_label = new;
2611 pool_window_last = i;
2612 return lab;
2613 }
2614 }
2615 }
2616
2617 /* Need a new one. */
2618 pool_vector[pool_size].value = x;
2619 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2620 {
2621 lab = 0;
2622 pool_vector[pool_size - 1].part_of_sequence_p = true;
2623 }
2624 else
2625 lab = gen_label_rtx ();
2626 pool_vector[pool_size].mode = mode;
2627 pool_vector[pool_size].label = lab;
2628 pool_vector[pool_size].wend = NULL_RTX;
2629 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2630 if (lab && pool_window_label)
2631 {
2632 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2633 ref = pool_vector[pool_window_last].wend;
2634 LABEL_NEXTREF (newref) = ref;
2635 pool_vector[pool_window_last].wend = newref;
2636 }
2637 if (lab)
2638 pool_window_label = lab;
2639 pool_window_last = pool_size;
2640 pool_size++;
2641 return lab;
2642 }
2643
2644 /* Output the literal table. */
2645
2646 static void
2647 dump_table (scan)
2648 rtx scan;
2649 {
2650 int i;
2651 int need_align = 1;
2652 rtx lab, ref;
2653 int have_df = 0;
2654
2655 /* Do two passes, first time dump out the HI sized constants. */
2656
2657 for (i = 0; i < pool_size; i++)
2658 {
2659 pool_node *p = &pool_vector[i];
2660
2661 if (p->mode == HImode)
2662 {
2663 if (need_align)
2664 {
2665 scan = emit_insn_after (gen_align_2 (), scan);
2666 need_align = 0;
2667 }
2668 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2669 scan = emit_label_after (lab, scan);
2670 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2671 scan);
2672 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2673 {
2674 lab = XEXP (ref, 0);
2675 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2676 }
2677 }
2678 else if (p->mode == DFmode)
2679 have_df = 1;
2680 }
2681
2682 need_align = 1;
2683
2684 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2685 {
2686 rtx align_insn = NULL_RTX;
2687
2688 scan = emit_label_after (gen_label_rtx (), scan);
2689 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2690 need_align = 0;
2691
2692 for (i = 0; i < pool_size; i++)
2693 {
2694 pool_node *p = &pool_vector[i];
2695
2696 switch (p->mode)
2697 {
2698 case HImode:
2699 break;
2700 case SImode:
2701 case SFmode:
2702 if (align_insn && !p->part_of_sequence_p)
2703 {
2704 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2705 emit_label_before (lab, align_insn);
2706 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2707 align_insn);
2708 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2709 {
2710 lab = XEXP (ref, 0);
2711 emit_insn_before (gen_consttable_window_end (lab),
2712 align_insn);
2713 }
2714 delete_insn (align_insn);
2715 align_insn = NULL_RTX;
2716 continue;
2717 }
2718 else
2719 {
2720 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2721 scan = emit_label_after (lab, scan);
2722 scan = emit_insn_after (gen_consttable_4 (p->value,
2723 const0_rtx), scan);
2724 need_align = ! need_align;
2725 }
2726 break;
2727 case DFmode:
2728 if (need_align)
2729 {
2730 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2731 align_insn = scan;
2732 need_align = 0;
2733 }
2734 case DImode:
2735 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2736 scan = emit_label_after (lab, scan);
2737 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2738 scan);
2739 break;
2740 default:
2741 abort ();
2742 break;
2743 }
2744
2745 if (p->mode != HImode)
2746 {
2747 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2748 {
2749 lab = XEXP (ref, 0);
2750 scan = emit_insn_after (gen_consttable_window_end (lab),
2751 scan);
2752 }
2753 }
2754 }
2755
2756 pool_size = 0;
2757 }
2758
2759 for (i = 0; i < pool_size; i++)
2760 {
2761 pool_node *p = &pool_vector[i];
2762
2763 switch (p->mode)
2764 {
2765 case HImode:
2766 break;
2767 case SImode:
2768 case SFmode:
2769 if (need_align)
2770 {
2771 need_align = 0;
2772 scan = emit_label_after (gen_label_rtx (), scan);
2773 scan = emit_insn_after (gen_align_4 (), scan);
2774 }
2775 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2776 scan = emit_label_after (lab, scan);
2777 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2778 scan);
2779 break;
2780 case DFmode:
2781 case DImode:
2782 if (need_align)
2783 {
2784 need_align = 0;
2785 scan = emit_label_after (gen_label_rtx (), scan);
2786 scan = emit_insn_after (gen_align_4 (), scan);
2787 }
2788 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2789 scan = emit_label_after (lab, scan);
2790 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2791 scan);
2792 break;
2793 default:
2794 abort ();
2795 break;
2796 }
2797
2798 if (p->mode != HImode)
2799 {
2800 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2801 {
2802 lab = XEXP (ref, 0);
2803 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2804 }
2805 }
2806 }
2807
2808 scan = emit_insn_after (gen_consttable_end (), scan);
2809 scan = emit_barrier_after (scan);
2810 pool_size = 0;
2811 pool_window_label = NULL_RTX;
2812 pool_window_last = 0;
2813 }
2814
2815 /* Return nonzero if constant would be an ok source for a
2816 mov.w instead of a mov.l. */
2817
2818 static int
2819 hi_const (src)
2820 rtx src;
2821 {
2822 return (GET_CODE (src) == CONST_INT
2823 && INTVAL (src) >= -32768
2824 && INTVAL (src) <= 32767);
2825 }
2826
2827 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2828
2829 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2830 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2831 need to fix it if the input value is CONST_OK_FOR_I. */
2832
2833 static int
2834 broken_move (insn)
2835 rtx insn;
2836 {
2837 if (GET_CODE (insn) == INSN)
2838 {
2839 rtx pat = PATTERN (insn);
2840 if (GET_CODE (pat) == PARALLEL)
2841 pat = XVECEXP (pat, 0, 0);
2842 if (GET_CODE (pat) == SET
2843 /* We can load any 8 bit value if we don't care what the high
2844 order bits end up as. */
2845 && GET_MODE (SET_DEST (pat)) != QImode
2846 && (CONSTANT_P (SET_SRC (pat))
2847 /* Match mova_const. */
2848 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2849 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2850 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2851 && ! (TARGET_SH2E
2852 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2853 && (fp_zero_operand (SET_SRC (pat))
2854 || fp_one_operand (SET_SRC (pat)))
2855 /* ??? If this is a -m4 or -m4-single compilation, in general
2856 we don't know the current setting of fpscr, so disable fldi.
2857 There is an exception if this was a register-register move
2858 before reload - and hence it was ascertained that we have
2859 single precision setting - and in a post-reload optimization
2860 we changed this to do a constant load. In that case
2861 we don't have an r0 clobber, hence we must use fldi. */
2862 && (! TARGET_SH4 || TARGET_FMOVD
2863 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2864 == SCRATCH))
2865 && GET_CODE (SET_DEST (pat)) == REG
2866 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2867 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2868 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2869 return 1;
2870 }
2871
2872 return 0;
2873 }
2874
2875 static int
2876 mova_p (insn)
2877 rtx insn;
2878 {
2879 return (GET_CODE (insn) == INSN
2880 && GET_CODE (PATTERN (insn)) == SET
2881 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2882 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2883 /* Don't match mova_const. */
2884 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2885 }
2886
2887 /* Find the last barrier from insn FROM which is close enough to hold the
2888 constant pool. If we can't find one, then create one near the end of
2889 the range. */
2890
2891 static rtx
2892 find_barrier (num_mova, mova, from)
2893 int num_mova;
2894 rtx mova, from;
2895 {
2896 int count_si = 0;
2897 int count_hi = 0;
2898 int found_hi = 0;
2899 int found_si = 0;
2900 int found_di = 0;
2901 int hi_align = 2;
2902 int si_align = 2;
2903 int leading_mova = num_mova;
2904 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2905 int si_limit;
2906 int hi_limit;
2907
2908 /* For HImode: range is 510, add 4 because pc counts from address of
2909 second instruction after this one, subtract 2 for the jump instruction
2910 that we may need to emit before the table, subtract 2 for the instruction
2911 that fills the jump delay slot (in very rare cases, reorg will take an
2912 instruction from after the constant pool or will leave the delay slot
2913 empty). This gives 510.
2914 For SImode: range is 1020, add 4 because pc counts from address of
2915 second instruction after this one, subtract 2 in case pc is 2 byte
2916 aligned, subtract 2 for the jump instruction that we may need to emit
2917 before the table, subtract 2 for the instruction that fills the jump
2918 delay slot. This gives 1018. */
2919
2920 /* The branch will always be shortened now that the reference address for
2921 forward branches is the successor address, thus we need no longer make
2922 adjustments to the [sh]i_limit for -O0. */
2923
2924 si_limit = 1018;
2925 hi_limit = 510;
2926
2927 while (from && count_si < si_limit && count_hi < hi_limit)
2928 {
2929 int inc = get_attr_length (from);
2930 int new_align = 1;
2931
2932 if (GET_CODE (from) == CODE_LABEL)
2933 {
2934 if (optimize)
2935 new_align = 1 << label_to_alignment (from);
2936 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2937 new_align = 1 << barrier_align (from);
2938 else
2939 new_align = 1;
2940 inc = 0;
2941 }
2942
2943 if (GET_CODE (from) == BARRIER)
2944 {
2945
2946 found_barrier = from;
2947
2948 /* If we are at the end of the function, or in front of an alignment
2949 instruction, we need not insert an extra alignment. We prefer
2950 this kind of barrier. */
2951 if (barrier_align (from) > 2)
2952 good_barrier = from;
2953 }
2954
2955 if (broken_move (from))
2956 {
2957 rtx pat, src, dst;
2958 enum machine_mode mode;
2959
2960 pat = PATTERN (from);
2961 if (GET_CODE (pat) == PARALLEL)
2962 pat = XVECEXP (pat, 0, 0);
2963 src = SET_SRC (pat);
2964 dst = SET_DEST (pat);
2965 mode = GET_MODE (dst);
2966
2967 /* We must explicitly check the mode, because sometimes the
2968 front end will generate code to load unsigned constants into
2969 HImode targets without properly sign extending them. */
2970 if (mode == HImode
2971 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2972 {
2973 found_hi += 2;
2974 /* We put the short constants before the long constants, so
2975 we must count the length of short constants in the range
2976 for the long constants. */
2977 /* ??? This isn't optimal, but is easy to do. */
2978 si_limit -= 2;
2979 }
2980 else
2981 {
2982 /* We dump DF/DI constants before SF/SI ones, because
2983 the limit is the same, but the alignment requirements
2984 are higher. We may waste up to 4 additional bytes
2985 for alignment, and the DF/DI constant may have
2986 another SF/SI constant placed before it. */
2987 if (TARGET_SHCOMPACT
2988 && ! found_di
2989 && (mode == DFmode || mode == DImode))
2990 {
2991 found_di = 1;
2992 si_limit -= 8;
2993 }
2994 while (si_align > 2 && found_si + si_align - 2 > count_si)
2995 si_align >>= 1;
2996 if (found_si > count_si)
2997 count_si = found_si;
2998 found_si += GET_MODE_SIZE (mode);
2999 if (num_mova)
3000 si_limit -= GET_MODE_SIZE (mode);
3001 }
3002
3003 /* See the code in machine_dependent_reorg, which has a similar if
3004 statement that generates a new mova insn in many cases. */
3005 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3006 inc += 2;
3007 }
3008
3009 if (mova_p (from))
3010 {
3011 if (! num_mova++)
3012 {
3013 leading_mova = 0;
3014 mova = from;
3015 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3016 }
3017 if (found_si > count_si)
3018 count_si = found_si;
3019 }
3020 else if (GET_CODE (from) == JUMP_INSN
3021 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3022 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3023 {
3024 if (num_mova)
3025 num_mova--;
3026 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3027 {
3028 /* We have just passed the barrier in front of the
3029 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3030 the ADDR_DIFF_VEC is accessed as data, just like our pool
3031 constants, this is a good opportunity to accommodate what
3032 we have gathered so far.
3033 If we waited any longer, we could end up at a barrier in
3034 front of code, which gives worse cache usage for separated
3035 instruction / data caches. */
3036 good_barrier = found_barrier;
3037 break;
3038 }
3039 else
3040 {
3041 rtx body = PATTERN (from);
3042 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3043 }
3044 }
3045 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3046 else if (GET_CODE (from) == JUMP_INSN
3047 && ! TARGET_SH2
3048 && ! TARGET_SMALLCODE)
3049 new_align = 4;
3050
3051 if (found_si)
3052 {
3053 count_si += inc;
3054 if (new_align > si_align)
3055 {
3056 si_limit -= (count_si - 1) & (new_align - si_align);
3057 si_align = new_align;
3058 }
3059 count_si = (count_si + new_align - 1) & -new_align;
3060 }
3061 if (found_hi)
3062 {
3063 count_hi += inc;
3064 if (new_align > hi_align)
3065 {
3066 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3067 hi_align = new_align;
3068 }
3069 count_hi = (count_hi + new_align - 1) & -new_align;
3070 }
3071 from = NEXT_INSN (from);
3072 }
3073
3074 if (num_mova)
3075 {
3076 if (leading_mova)
3077 {
3078 /* Try as we might, the leading mova is out of range. Change
3079 it into a load (which will become a pcload) and retry. */
3080 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3081 INSN_CODE (mova) = -1;
3082 return find_barrier (0, 0, mova);
3083 }
3084 else
3085 {
3086 /* Insert the constant pool table before the mova instruction,
3087 to prevent the mova label reference from going out of range. */
3088 from = mova;
3089 good_barrier = found_barrier = barrier_before_mova;
3090 }
3091 }
3092
3093 if (found_barrier)
3094 {
3095 if (good_barrier && next_real_insn (found_barrier))
3096 found_barrier = good_barrier;
3097 }
3098 else
3099 {
3100 /* We didn't find a barrier in time to dump our stuff,
3101 so we'll make one. */
3102 rtx label = gen_label_rtx ();
3103
3104 /* If we exceeded the range, then we must back up over the last
3105 instruction we looked at. Otherwise, we just need to undo the
3106 NEXT_INSN at the end of the loop. */
3107 if (count_hi > hi_limit || count_si > si_limit)
3108 from = PREV_INSN (PREV_INSN (from));
3109 else
3110 from = PREV_INSN (from);
3111
3112 /* Walk back to be just before any jump or label.
3113 Putting it before a label reduces the number of times the branch
3114 around the constant pool table will be hit. Putting it before
3115 a jump makes it more likely that the bra delay slot will be
3116 filled. */
3117 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3118 || GET_CODE (from) == CODE_LABEL)
3119 from = PREV_INSN (from);
3120
3121 from = emit_jump_insn_after (gen_jump (label), from);
3122 JUMP_LABEL (from) = label;
3123 LABEL_NUSES (label) = 1;
3124 found_barrier = emit_barrier_after (from);
3125 emit_label_after (label, found_barrier);
3126 }
3127
3128 return found_barrier;
3129 }
3130
3131 /* If the instruction INSN is implemented by a special function, and we can
3132 positively find the register that is used to call the sfunc, and this
3133 register is not used anywhere else in this instruction - except as the
3134 destination of a set, return this register; else, return 0. */
3135 rtx
3136 sfunc_uses_reg (insn)
3137 rtx insn;
3138 {
3139 int i;
3140 rtx pattern, part, reg_part, reg;
3141
3142 if (GET_CODE (insn) != INSN)
3143 return 0;
3144 pattern = PATTERN (insn);
3145 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3146 return 0;
3147
3148 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3149 {
3150 part = XVECEXP (pattern, 0, i);
3151 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3152 reg_part = part;
3153 }
3154 if (! reg_part)
3155 return 0;
3156 reg = XEXP (reg_part, 0);
3157 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3158 {
3159 part = XVECEXP (pattern, 0, i);
3160 if (part == reg_part || GET_CODE (part) == CLOBBER)
3161 continue;
3162 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3163 && GET_CODE (SET_DEST (part)) == REG)
3164 ? SET_SRC (part) : part)))
3165 return 0;
3166 }
3167 return reg;
3168 }
3169
3170 /* See if the only way in which INSN uses REG is by calling it, or by
3171 setting it while calling it. Set *SET to a SET rtx if the register
3172 is set by INSN. */
3173
3174 static int
3175 noncall_uses_reg (reg, insn, set)
3176 rtx reg;
3177 rtx insn;
3178 rtx *set;
3179 {
3180 rtx pattern, reg2;
3181
3182 *set = NULL_RTX;
3183
3184 reg2 = sfunc_uses_reg (insn);
3185 if (reg2 && REGNO (reg2) == REGNO (reg))
3186 {
3187 pattern = single_set (insn);
3188 if (pattern
3189 && GET_CODE (SET_DEST (pattern)) == REG
3190 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3191 *set = pattern;
3192 return 0;
3193 }
3194 if (GET_CODE (insn) != CALL_INSN)
3195 {
3196 /* We don't use rtx_equal_p because we don't care if the mode is
3197 different. */
3198 pattern = single_set (insn);
3199 if (pattern
3200 && GET_CODE (SET_DEST (pattern)) == REG
3201 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3202 {
3203 rtx par, part;
3204 int i;
3205
3206 *set = pattern;
3207 par = PATTERN (insn);
3208 if (GET_CODE (par) == PARALLEL)
3209 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3210 {
3211 part = XVECEXP (par, 0, i);
3212 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3213 return 1;
3214 }
3215 return reg_mentioned_p (reg, SET_SRC (pattern));
3216 }
3217
3218 return 1;
3219 }
3220
3221 pattern = PATTERN (insn);
3222
3223 if (GET_CODE (pattern) == PARALLEL)
3224 {
3225 int i;
3226
3227 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3228 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3229 return 1;
3230 pattern = XVECEXP (pattern, 0, 0);
3231 }
3232
3233 if (GET_CODE (pattern) == SET)
3234 {
3235 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3236 {
3237 /* We don't use rtx_equal_p, because we don't care if the
3238 mode is different. */
3239 if (GET_CODE (SET_DEST (pattern)) != REG
3240 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3241 return 1;
3242
3243 *set = pattern;
3244 }
3245
3246 pattern = SET_SRC (pattern);
3247 }
3248
3249 if (GET_CODE (pattern) != CALL
3250 || GET_CODE (XEXP (pattern, 0)) != MEM
3251 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3252 return 1;
3253
3254 return 0;
3255 }
3256
3257 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3258 general registers. Bits 0..15 mean that the respective registers
3259 are used as inputs in the instruction. Bits 16..31 mean that the
3260 registers 0..15, respectively, are used as outputs, or are clobbered.
3261 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3262 int
3263 regs_used (x, is_dest)
3264 rtx x; int is_dest;
3265 {
3266 enum rtx_code code;
3267 const char *fmt;
3268 int i, used = 0;
3269
3270 if (! x)
3271 return used;
3272 code = GET_CODE (x);
3273 switch (code)
3274 {
3275 case REG:
3276 if (REGNO (x) < 16)
3277 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3278 << (REGNO (x) + is_dest));
3279 return 0;
3280 case SUBREG:
3281 {
3282 rtx y = SUBREG_REG (x);
3283
3284 if (GET_CODE (y) != REG)
3285 break;
3286 if (REGNO (y) < 16)
3287 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3288 << (REGNO (y) +
3289 subreg_regno_offset (REGNO (y),
3290 GET_MODE (y),
3291 SUBREG_BYTE (x),
3292 GET_MODE (x)) + is_dest));
3293 return 0;
3294 }
3295 case SET:
3296 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3297 case RETURN:
3298 /* If there was a return value, it must have been indicated with USE. */
3299 return 0x00ffff00;
3300 case CLOBBER:
3301 is_dest = 1;
3302 break;
3303 case MEM:
3304 is_dest = 0;
3305 break;
3306 case CALL:
3307 used |= 0x00ff00f0;
3308 break;
3309 default:
3310 break;
3311 }
3312
3313 fmt = GET_RTX_FORMAT (code);
3314
3315 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3316 {
3317 if (fmt[i] == 'E')
3318 {
3319 register int j;
3320 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3321 used |= regs_used (XVECEXP (x, i, j), is_dest);
3322 }
3323 else if (fmt[i] == 'e')
3324 used |= regs_used (XEXP (x, i), is_dest);
3325 }
3326 return used;
3327 }
3328
3329 /* Create an instruction that prevents redirection of a conditional branch
3330 to the destination of the JUMP with address ADDR.
3331 If the branch needs to be implemented as an indirect jump, try to find
3332 a scratch register for it.
3333 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3334 If any preceding insn that doesn't fit into a delay slot is good enough,
3335 pass 1. Pass 2 if a definite blocking insn is needed.
3336 -1 is used internally to avoid deep recursion.
3337 If a blocking instruction is made or recognized, return it. */
3338
3339 static rtx
3340 gen_block_redirect (jump, addr, need_block)
3341 rtx jump;
3342 int addr, need_block;
3343 {
3344 int dead = 0;
3345 rtx prev = prev_nonnote_insn (jump);
3346 rtx dest;
3347
3348 /* First, check if we already have an instruction that satisfies our need. */
3349 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3350 {
3351 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3352 return prev;
3353 if (GET_CODE (PATTERN (prev)) == USE
3354 || GET_CODE (PATTERN (prev)) == CLOBBER
3355 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3356 prev = jump;
3357 else if ((need_block &= ~1) < 0)
3358 return prev;
3359 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3360 need_block = 0;
3361 }
3362 /* We can't use JUMP_LABEL here because it might be undefined
3363 when not optimizing. */
3364 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3365 /* If the branch is out of range, try to find a scratch register for it. */
3366 if (optimize
3367 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3368 > 4092 + 4098))
3369 {
3370 rtx scan;
3371 /* Don't look for the stack pointer as a scratch register,
3372 it would cause trouble if an interrupt occurred. */
3373 unsigned try = 0x7fff, used;
3374 int jump_left = flag_expensive_optimizations + 1;
3375
3376 /* It is likely that the most recent eligible instruction is wanted for
3377 the delay slot. Therefore, find out which registers it uses, and
3378 try to avoid using them. */
3379
3380 for (scan = jump; (scan = PREV_INSN (scan)); )
3381 {
3382 enum rtx_code code;
3383
3384 if (INSN_DELETED_P (scan))
3385 continue;
3386 code = GET_CODE (scan);
3387 if (code == CODE_LABEL || code == JUMP_INSN)
3388 break;
3389 if (code == INSN
3390 && GET_CODE (PATTERN (scan)) != USE
3391 && GET_CODE (PATTERN (scan)) != CLOBBER
3392 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3393 {
3394 try &= ~regs_used (PATTERN (scan), 0);
3395 break;
3396 }
3397 }
3398 for (used = dead = 0, scan = JUMP_LABEL (jump);
3399 (scan = NEXT_INSN (scan)); )
3400 {
3401 enum rtx_code code;
3402
3403 if (INSN_DELETED_P (scan))
3404 continue;
3405 code = GET_CODE (scan);
3406 if (GET_RTX_CLASS (code) == 'i')
3407 {
3408 used |= regs_used (PATTERN (scan), 0);
3409 if (code == CALL_INSN)
3410 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3411 dead |= (used >> 16) & ~used;
3412 if (dead & try)
3413 {
3414 dead &= try;
3415 break;
3416 }
3417 if (code == JUMP_INSN)
3418 {
3419 if (jump_left-- && simplejump_p (scan))
3420 scan = JUMP_LABEL (scan);
3421 else
3422 break;
3423 }
3424 }
3425 }
3426 /* Mask out the stack pointer again, in case it was
3427 the only 'free' register we have found. */
3428 dead &= 0x7fff;
3429 }
3430 /* If the immediate destination is still in range, check for possible
3431 threading with a jump beyond the delay slot insn.
3432 Don't check if we are called recursively; the jump has been or will be
3433 checked in a different invocation then. */
3434
3435 else if (optimize && need_block >= 0)
3436 {
3437 rtx next = next_active_insn (next_active_insn (dest));
3438 if (next && GET_CODE (next) == JUMP_INSN
3439 && GET_CODE (PATTERN (next)) == SET
3440 && recog_memoized (next) == CODE_FOR_jump_compact)
3441 {
3442 dest = JUMP_LABEL (next);
3443 if (dest
3444 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3445 > 4092 + 4098))
3446 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3447 }
3448 }
3449
3450 if (dead)
3451 {
3452 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3453
3454 /* It would be nice if we could convert the jump into an indirect
3455 jump / far branch right now, and thus exposing all constituent
3456 instructions to further optimization. However, reorg uses
3457 simplejump_p to determine if there is an unconditional jump where
3458 it should try to schedule instructions from the target of the
3459 branch; simplejump_p fails for indirect jumps even if they have
3460 a JUMP_LABEL. */
3461 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3462 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3463 , jump);
3464 /* ??? We would like this to have the scope of the jump, but that
3465 scope will change when a delay slot insn of an inner scope is added.
3466 Hence, after delay slot scheduling, we'll have to expect
3467 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3468 the jump. */
3469
3470 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3471 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3472 return insn;
3473 }
3474 else if (need_block)
3475 /* We can't use JUMP_LABEL here because it might be undefined
3476 when not optimizing. */
3477 return emit_insn_before (gen_block_branch_redirect
3478 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3479 , jump);
3480 return prev;
3481 }
3482
3483 #define CONDJUMP_MIN -252
3484 #define CONDJUMP_MAX 262
3485 struct far_branch
3486 {
3487 /* A label (to be placed) in front of the jump
3488 that jumps to our ultimate destination. */
3489 rtx near_label;
3490 /* Where we are going to insert it if we cannot move the jump any farther,
3491 or the jump itself if we have picked up an existing jump. */
3492 rtx insert_place;
3493 /* The ultimate destination. */
3494 rtx far_label;
3495 struct far_branch *prev;
3496 /* If the branch has already been created, its address;
3497 else the address of its first prospective user. */
3498 int address;
3499 };
3500
3501 static void gen_far_branch PARAMS ((struct far_branch *));
3502 enum mdep_reorg_phase_e mdep_reorg_phase;
3503 static void
3504 gen_far_branch (bp)
3505 struct far_branch *bp;
3506 {
3507 rtx insn = bp->insert_place;
3508 rtx jump;
3509 rtx label = gen_label_rtx ();
3510
3511 emit_label_after (label, insn);
3512 if (bp->far_label)
3513 {
3514 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3515 LABEL_NUSES (bp->far_label)++;
3516 }
3517 else
3518 jump = emit_jump_insn_after (gen_return (), insn);
3519 /* Emit a barrier so that reorg knows that any following instructions
3520 are not reachable via a fall-through path.
3521 But don't do this when not optimizing, since we wouldn't supress the
3522 alignment for the barrier then, and could end up with out-of-range
3523 pc-relative loads. */
3524 if (optimize)
3525 emit_barrier_after (jump);
3526 emit_label_after (bp->near_label, insn);
3527 JUMP_LABEL (jump) = bp->far_label;
3528 if (! invert_jump (insn, label, 1))
3529 abort ();
3530 (emit_insn_after
3531 (gen_stuff_delay_slot
3532 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3533 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3534 insn));
3535 /* Prevent reorg from undoing our splits. */
3536 gen_block_redirect (jump, bp->address += 2, 2);
3537 }
3538
3539 /* Fix up ADDR_DIFF_VECs. */
3540 void
3541 fixup_addr_diff_vecs (first)
3542 rtx first;
3543 {
3544 rtx insn;
3545
3546 for (insn = first; insn; insn = NEXT_INSN (insn))
3547 {
3548 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3549
3550 if (GET_CODE (insn) != JUMP_INSN
3551 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3552 continue;
3553 pat = PATTERN (insn);
3554 vec_lab = XEXP (XEXP (pat, 0), 0);
3555
3556 /* Search the matching casesi_jump_2. */
3557 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3558 {
3559 if (GET_CODE (prev) != JUMP_INSN)
3560 continue;
3561 prevpat = PATTERN (prev);
3562 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3563 continue;
3564 x = XVECEXP (prevpat, 0, 1);
3565 if (GET_CODE (x) != USE)
3566 continue;
3567 x = XEXP (x, 0);
3568 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3569 break;
3570 }
3571
3572 /* Emit the reference label of the braf where it belongs, right after
3573 the casesi_jump_2 (i.e. braf). */
3574 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3575 emit_label_after (braf_label, prev);
3576
3577 /* Fix up the ADDR_DIF_VEC to be relative
3578 to the reference address of the braf. */
3579 XEXP (XEXP (pat, 0), 0) = braf_label;
3580 }
3581 }
3582
3583 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3584 a barrier. Return the base 2 logarithm of the desired alignment. */
3585 int
3586 barrier_align (barrier_or_label)
3587 rtx barrier_or_label;
3588 {
3589 rtx next = next_real_insn (barrier_or_label), pat, prev;
3590 int slot, credit, jump_to_next = 0;
3591
3592 if (! next)
3593 return 0;
3594
3595 pat = PATTERN (next);
3596
3597 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3598 return 2;
3599
3600 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3601 /* This is a barrier in front of a constant table. */
3602 return 0;
3603
3604 prev = prev_real_insn (barrier_or_label);
3605 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3606 {
3607 pat = PATTERN (prev);
3608 /* If this is a very small table, we want to keep the alignment after
3609 the table to the minimum for proper code alignment. */
3610 return ((TARGET_SMALLCODE
3611 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3612 <= (unsigned)1 << (CACHE_LOG - 2)))
3613 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3614 }
3615
3616 if (TARGET_SMALLCODE)
3617 return 0;
3618
3619 if (! TARGET_SH2 || ! optimize)
3620 return align_jumps_log;
3621
3622 /* When fixing up pcloads, a constant table might be inserted just before
3623 the basic block that ends with the barrier. Thus, we can't trust the
3624 instruction lengths before that. */
3625 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3626 {
3627 /* Check if there is an immediately preceding branch to the insn beyond
3628 the barrier. We must weight the cost of discarding useful information
3629 from the current cache line when executing this branch and there is
3630 an alignment, against that of fetching unneeded insn in front of the
3631 branch target when there is no alignment. */
3632
3633 /* There are two delay_slot cases to consider. One is the simple case
3634 where the preceding branch is to the insn beyond the barrier (simple
3635 delay slot filling), and the other is where the preceding branch has
3636 a delay slot that is a duplicate of the insn after the barrier
3637 (fill_eager_delay_slots) and the branch is to the insn after the insn
3638 after the barrier. */
3639
3640 /* PREV is presumed to be the JUMP_INSN for the barrier under
3641 investigation. Skip to the insn before it. */
3642 prev = prev_real_insn (prev);
3643
3644 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3645 credit >= 0 && prev && GET_CODE (prev) == INSN;
3646 prev = prev_real_insn (prev))
3647 {
3648 jump_to_next = 0;
3649 if (GET_CODE (PATTERN (prev)) == USE
3650 || GET_CODE (PATTERN (prev)) == CLOBBER)
3651 continue;
3652 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3653 {
3654 prev = XVECEXP (PATTERN (prev), 0, 1);
3655 if (INSN_UID (prev) == INSN_UID (next))
3656 {
3657 /* Delay slot was filled with insn at jump target. */
3658 jump_to_next = 1;
3659 continue;
3660 }
3661 }
3662
3663 if (slot &&
3664 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3665 slot = 0;
3666 credit -= get_attr_length (prev);
3667 }
3668 if (prev
3669 && GET_CODE (prev) == JUMP_INSN
3670 && JUMP_LABEL (prev))
3671 {
3672 rtx x;
3673 if (jump_to_next
3674 || next_real_insn (JUMP_LABEL (prev)) == next
3675 /* If relax_delay_slots() decides NEXT was redundant
3676 with some previous instruction, it will have
3677 redirected PREV's jump to the following insn. */
3678 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3679 /* There is no upper bound on redundant instructions
3680 that might have been skipped, but we must not put an
3681 alignment where none had been before. */
3682 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3683 (INSN_P (x)
3684 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3685 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3686 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3687 {
3688 rtx pat = PATTERN (prev);
3689 if (GET_CODE (pat) == PARALLEL)
3690 pat = XVECEXP (pat, 0, 0);
3691 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3692 return 0;
3693 }
3694 }
3695 }
3696
3697 return align_jumps_log;
3698 }
3699
3700 /* If we are inside a phony loop, almost any kind of label can turn up as the
3701 first one in the loop. Aligning a braf label causes incorrect switch
3702 destination addresses; we can detect braf labels because they are
3703 followed by a BARRIER.
3704 Applying loop alignment to small constant or switch tables is a waste
3705 of space, so we suppress this too. */
3706 int
3707 sh_loop_align (label)
3708 rtx label;
3709 {
3710 rtx next = label;
3711
3712 do
3713 next = next_nonnote_insn (next);
3714 while (next && GET_CODE (next) == CODE_LABEL);
3715
3716 if (! next
3717 || ! INSN_P (next)
3718 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3719 || recog_memoized (next) == CODE_FOR_consttable_2)
3720 return 0;
3721
3722 return align_loops_log;
3723 }
3724
3725 /* Do a final pass over the function, just before delayed branch
3726 scheduling. */
3727
3728 static void
3729 sh_reorg ()
3730 {
3731 rtx first, insn, mova = NULL_RTX;
3732 int num_mova;
3733 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3734 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3735
3736 first = get_insns ();
3737
3738 /* We must split call insns before introducing `mova's. If we're
3739 optimizing, they'll have already been split. Otherwise, make
3740 sure we don't split them too late. */
3741 if (! optimize)
3742 split_all_insns_noflow ();
3743
3744 if (TARGET_SHMEDIA)
3745 return;
3746
3747 /* If relaxing, generate pseudo-ops to associate function calls with
3748 the symbols they call. It does no harm to not generate these
3749 pseudo-ops. However, when we can generate them, it enables to
3750 linker to potentially relax the jsr to a bsr, and eliminate the
3751 register load and, possibly, the constant pool entry. */
3752
3753 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3754 if (TARGET_RELAX)
3755 {
3756 /* Remove all REG_LABEL notes. We want to use them for our own
3757 purposes. This works because none of the remaining passes
3758 need to look at them.
3759
3760 ??? But it may break in the future. We should use a machine
3761 dependent REG_NOTE, or some other approach entirely. */
3762 for (insn = first; insn; insn = NEXT_INSN (insn))
3763 {
3764 if (INSN_P (insn))
3765 {
3766 rtx note;
3767
3768 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3769 remove_note (insn, note);
3770 }
3771 }
3772
3773 for (insn = first; insn; insn = NEXT_INSN (insn))
3774 {
3775 rtx pattern, reg, link, set, scan, dies, label;
3776 int rescan = 0, foundinsn = 0;
3777
3778 if (GET_CODE (insn) == CALL_INSN)
3779 {
3780 pattern = PATTERN (insn);
3781
3782 if (GET_CODE (pattern) == PARALLEL)
3783 pattern = XVECEXP (pattern, 0, 0);
3784 if (GET_CODE (pattern) == SET)
3785 pattern = SET_SRC (pattern);
3786
3787 if (GET_CODE (pattern) != CALL
3788 || GET_CODE (XEXP (pattern, 0)) != MEM)
3789 continue;
3790
3791 reg = XEXP (XEXP (pattern, 0), 0);
3792 }
3793 else
3794 {
3795 reg = sfunc_uses_reg (insn);
3796 if (! reg)
3797 continue;
3798 }
3799
3800 if (GET_CODE (reg) != REG)
3801 continue;
3802
3803 /* This is a function call via REG. If the only uses of REG
3804 between the time that it is set and the time that it dies
3805 are in function calls, then we can associate all the
3806 function calls with the setting of REG. */
3807
3808 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3809 {
3810 if (REG_NOTE_KIND (link) != 0)
3811 continue;
3812 set = single_set (XEXP (link, 0));
3813 if (set && rtx_equal_p (reg, SET_DEST (set)))
3814 {
3815 link = XEXP (link, 0);
3816 break;
3817 }
3818 }
3819
3820 if (! link)
3821 {
3822 /* ??? Sometimes global register allocation will have
3823 deleted the insn pointed to by LOG_LINKS. Try
3824 scanning backward to find where the register is set. */
3825 for (scan = PREV_INSN (insn);
3826 scan && GET_CODE (scan) != CODE_LABEL;
3827 scan = PREV_INSN (scan))
3828 {
3829 if (! INSN_P (scan))
3830 continue;
3831
3832 if (! reg_mentioned_p (reg, scan))
3833 continue;
3834
3835 if (noncall_uses_reg (reg, scan, &set))
3836 break;
3837
3838 if (set)
3839 {
3840 link = scan;
3841 break;
3842 }
3843 }
3844 }
3845
3846 if (! link)
3847 continue;
3848
3849 /* The register is set at LINK. */
3850
3851 /* We can only optimize the function call if the register is
3852 being set to a symbol. In theory, we could sometimes
3853 optimize calls to a constant location, but the assembler
3854 and linker do not support that at present. */
3855 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3856 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3857 continue;
3858
3859 /* Scan forward from LINK to the place where REG dies, and
3860 make sure that the only insns which use REG are
3861 themselves function calls. */
3862
3863 /* ??? This doesn't work for call targets that were allocated
3864 by reload, since there may not be a REG_DEAD note for the
3865 register. */
3866
3867 dies = NULL_RTX;
3868 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3869 {
3870 rtx scanset;
3871
3872 /* Don't try to trace forward past a CODE_LABEL if we haven't
3873 seen INSN yet. Ordinarily, we will only find the setting insn
3874 in LOG_LINKS if it is in the same basic block. However,
3875 cross-jumping can insert code labels in between the load and
3876 the call, and can result in situations where a single call
3877 insn may have two targets depending on where we came from. */
3878
3879 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3880 break;
3881
3882 if (! INSN_P (scan))
3883 continue;
3884
3885 /* Don't try to trace forward past a JUMP. To optimize
3886 safely, we would have to check that all the
3887 instructions at the jump destination did not use REG. */
3888
3889 if (GET_CODE (scan) == JUMP_INSN)
3890 break;
3891
3892 if (! reg_mentioned_p (reg, scan))
3893 continue;
3894
3895 if (noncall_uses_reg (reg, scan, &scanset))
3896 break;
3897
3898 if (scan == insn)
3899 foundinsn = 1;
3900
3901 if (scan != insn
3902 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3903 {
3904 /* There is a function call to this register other
3905 than the one we are checking. If we optimize
3906 this call, we need to rescan again below. */
3907 rescan = 1;
3908 }
3909
3910 /* ??? We shouldn't have to worry about SCANSET here.
3911 We should just be able to check for a REG_DEAD note
3912 on a function call. However, the REG_DEAD notes are
3913 apparently not dependable around libcalls; c-torture
3914 execute/920501-2 is a test case. If SCANSET is set,
3915 then this insn sets the register, so it must have
3916 died earlier. Unfortunately, this will only handle
3917 the cases in which the register is, in fact, set in a
3918 later insn. */
3919
3920 /* ??? We shouldn't have to use FOUNDINSN here.
3921 However, the LOG_LINKS fields are apparently not
3922 entirely reliable around libcalls;
3923 newlib/libm/math/e_pow.c is a test case. Sometimes
3924 an insn will appear in LOG_LINKS even though it is
3925 not the most recent insn which sets the register. */
3926
3927 if (foundinsn
3928 && (scanset
3929 || find_reg_note (scan, REG_DEAD, reg)))
3930 {
3931 dies = scan;
3932 break;
3933 }
3934 }
3935
3936 if (! dies)
3937 {
3938 /* Either there was a branch, or some insn used REG
3939 other than as a function call address. */
3940 continue;
3941 }
3942
3943 /* Create a code label, and put it in a REG_LABEL note on
3944 the insn which sets the register, and on each call insn
3945 which uses the register. In final_prescan_insn we look
3946 for the REG_LABEL notes, and output the appropriate label
3947 or pseudo-op. */
3948
3949 label = gen_label_rtx ();
3950 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3951 REG_NOTES (link));
3952 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3953 REG_NOTES (insn));
3954 if (rescan)
3955 {
3956 scan = link;
3957 do
3958 {
3959 rtx reg2;
3960
3961 scan = NEXT_INSN (scan);
3962 if (scan != insn
3963 && ((GET_CODE (scan) == CALL_INSN
3964 && reg_mentioned_p (reg, scan))
3965 || ((reg2 = sfunc_uses_reg (scan))
3966 && REGNO (reg2) == REGNO (reg))))
3967 REG_NOTES (scan)
3968 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3969 }
3970 while (scan != dies);
3971 }
3972 }
3973 }
3974
3975 if (TARGET_SH2)
3976 fixup_addr_diff_vecs (first);
3977
3978 if (optimize)
3979 {
3980 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3981 shorten_branches (first);
3982 }
3983 /* Scan the function looking for move instructions which have to be
3984 changed to pc-relative loads and insert the literal tables. */
3985
3986 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3987 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3988 {
3989 if (mova_p (insn))
3990 {
3991 if (! num_mova++)
3992 mova = insn;
3993 }
3994 else if (GET_CODE (insn) == JUMP_INSN
3995 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3996 && num_mova)
3997 {
3998 rtx scan;
3999 int total;
4000
4001 num_mova--;
4002
4003 /* Some code might have been inserted between the mova and
4004 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4005 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4006 total += get_attr_length (scan);
4007
4008 /* range of mova is 1020, add 4 because pc counts from address of
4009 second instruction after this one, subtract 2 in case pc is 2
4010 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4011 cancels out with alignment effects of the mova itself. */
4012 if (total > 1022)
4013 {
4014 /* Change the mova into a load, and restart scanning
4015 there. broken_move will then return true for mova. */
4016 SET_SRC (PATTERN (mova))
4017 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4018 INSN_CODE (mova) = -1;
4019 insn = mova;
4020 }
4021 }
4022 if (broken_move (insn))
4023 {
4024 rtx scan;
4025 /* Scan ahead looking for a barrier to stick the constant table
4026 behind. */
4027 rtx barrier = find_barrier (num_mova, mova, insn);
4028 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4029
4030 if (num_mova && ! mova_p (mova))
4031 {
4032 /* find_barrier had to change the first mova into a
4033 pcload; thus, we have to start with this new pcload. */
4034 insn = mova;
4035 num_mova = 0;
4036 }
4037 /* Now find all the moves between the points and modify them. */
4038 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4039 {
4040 if (GET_CODE (scan) == CODE_LABEL)
4041 last_float = 0;
4042 if (broken_move (scan))
4043 {
4044 rtx *patp = &PATTERN (scan), pat = *patp;
4045 rtx src, dst;
4046 rtx lab;
4047 rtx newsrc;
4048 enum machine_mode mode;
4049
4050 if (GET_CODE (pat) == PARALLEL)
4051 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4052 src = SET_SRC (pat);
4053 dst = SET_DEST (pat);
4054 mode = GET_MODE (dst);
4055
4056 if (mode == SImode && hi_const (src)
4057 && REGNO (dst) != FPUL_REG)
4058 {
4059 int offset = 0;
4060
4061 mode = HImode;
4062 while (GET_CODE (dst) == SUBREG)
4063 {
4064 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4065 GET_MODE (SUBREG_REG (dst)),
4066 SUBREG_BYTE (dst),
4067 GET_MODE (dst));
4068 dst = SUBREG_REG (dst);
4069 }
4070 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4071 }
4072
4073 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4074 {
4075 /* This must be an insn that clobbers r0. */
4076 rtx clobber = XVECEXP (PATTERN (scan), 0,
4077 XVECLEN (PATTERN (scan), 0) - 1);
4078
4079 if (GET_CODE (clobber) != CLOBBER
4080 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4081 abort ();
4082
4083 if (last_float
4084 && reg_set_between_p (r0_rtx, last_float_move, scan))
4085 last_float = 0;
4086 if (last_float
4087 && TARGET_SHCOMPACT
4088 && GET_MODE_SIZE (mode) != 4
4089 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4090 last_float = 0;
4091 lab = add_constant (src, mode, last_float);
4092 if (lab)
4093 emit_insn_before (gen_mova (lab), scan);
4094 else
4095 {
4096 /* There will be a REG_UNUSED note for r0 on
4097 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4098 lest reorg:mark_target_live_regs will not
4099 consider r0 to be used, and we end up with delay
4100 slot insn in front of SCAN that clobbers r0. */
4101 rtx note
4102 = find_regno_note (last_float_move, REG_UNUSED, 0);
4103
4104 /* If we are not optimizing, then there may not be
4105 a note. */
4106 if (note)
4107 PUT_MODE (note, REG_INC);
4108
4109 *last_float_addr = r0_inc_rtx;
4110 }
4111 last_float_move = scan;
4112 last_float = src;
4113 newsrc = gen_rtx (MEM, mode,
4114 (((TARGET_SH4 && ! TARGET_FMOVD)
4115 || REGNO (dst) == FPUL_REG)
4116 ? r0_inc_rtx
4117 : r0_rtx));
4118 last_float_addr = &XEXP (newsrc, 0);
4119
4120 /* Remove the clobber of r0. */
4121 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4122 RTX_UNCHANGING_P (newsrc) = 1;
4123 }
4124 /* This is a mova needing a label. Create it. */
4125 else if (GET_CODE (src) == UNSPEC
4126 && XINT (src, 1) == UNSPEC_MOVA
4127 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4128 {
4129 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4130 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4131 newsrc = gen_rtx_UNSPEC (SImode,
4132 gen_rtvec (1, newsrc),
4133 UNSPEC_MOVA);
4134 }
4135 else
4136 {
4137 lab = add_constant (src, mode, 0);
4138 newsrc = gen_rtx_MEM (mode,
4139 gen_rtx_LABEL_REF (VOIDmode, lab));
4140 RTX_UNCHANGING_P (newsrc) = 1;
4141 }
4142 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4143 INSN_CODE (scan) = -1;
4144 }
4145 }
4146 dump_table (barrier);
4147 insn = barrier;
4148 }
4149 }
4150
4151 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4152 INSN_ADDRESSES_FREE ();
4153 split_branches (first);
4154
4155 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4156 also has an effect on the register that holds the address of the sfunc.
4157 Insert an extra dummy insn in front of each sfunc that pretends to
4158 use this register. */
4159 if (flag_delayed_branch)
4160 {
4161 for (insn = first; insn; insn = NEXT_INSN (insn))
4162 {
4163 rtx reg = sfunc_uses_reg (insn);
4164
4165 if (! reg)
4166 continue;
4167 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4168 }
4169 }
4170 #if 0
4171 /* fpscr is not actually a user variable, but we pretend it is for the
4172 sake of the previous optimization passes, since we want it handled like
4173 one. However, we don't have any debugging information for it, so turn
4174 it into a non-user variable now. */
4175 if (TARGET_SH4)
4176 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4177 #endif
4178 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4179 }
4180
4181 int
4182 get_dest_uid (label, max_uid)
4183 rtx label;
4184 int max_uid;
4185 {
4186 rtx dest = next_real_insn (label);
4187 int dest_uid;
4188 if (! dest)
4189 /* This can happen for an undefined label. */
4190 return 0;
4191 dest_uid = INSN_UID (dest);
4192 /* If this is a newly created branch redirection blocking instruction,
4193 we cannot index the branch_uid or insn_addresses arrays with its
4194 uid. But then, we won't need to, because the actual destination is
4195 the following branch. */
4196 while (dest_uid >= max_uid)
4197 {
4198 dest = NEXT_INSN (dest);
4199 dest_uid = INSN_UID (dest);
4200 }
4201 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4202 return 0;
4203 return dest_uid;
4204 }
4205
4206 /* Split condbranches that are out of range. Also add clobbers for
4207 scratch registers that are needed in far jumps.
4208 We do this before delay slot scheduling, so that it can take our
4209 newly created instructions into account. It also allows us to
4210 find branches with common targets more easily. */
4211
4212 static void
4213 split_branches (first)
4214 rtx first;
4215 {
4216 rtx insn;
4217 struct far_branch **uid_branch, *far_branch_list = 0;
4218 int max_uid = get_max_uid ();
4219
4220 /* Find out which branches are out of range. */
4221 shorten_branches (first);
4222
4223 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4224 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4225
4226 for (insn = first; insn; insn = NEXT_INSN (insn))
4227 if (! INSN_P (insn))
4228 continue;
4229 else if (INSN_DELETED_P (insn))
4230 {
4231 /* Shorten_branches would split this instruction again,
4232 so transform it into a note. */
4233 PUT_CODE (insn, NOTE);
4234 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4235 NOTE_SOURCE_FILE (insn) = 0;
4236 }
4237 else if (GET_CODE (insn) == JUMP_INSN
4238 /* Don't mess with ADDR_DIFF_VEC */
4239 && (GET_CODE (PATTERN (insn)) == SET
4240 || GET_CODE (PATTERN (insn)) == RETURN))
4241 {
4242 enum attr_type type = get_attr_type (insn);
4243 if (type == TYPE_CBRANCH)
4244 {
4245 rtx next, beyond;
4246
4247 if (get_attr_length (insn) > 4)
4248 {
4249 rtx src = SET_SRC (PATTERN (insn));
4250 rtx olabel = XEXP (XEXP (src, 1), 0);
4251 int addr = INSN_ADDRESSES (INSN_UID (insn));
4252 rtx label = 0;
4253 int dest_uid = get_dest_uid (olabel, max_uid);
4254 struct far_branch *bp = uid_branch[dest_uid];
4255
4256 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4257 the label if the LABEL_NUSES count drops to zero. There is
4258 always a jump_optimize pass that sets these values, but it
4259 proceeds to delete unreferenced code, and then if not
4260 optimizing, to un-delete the deleted instructions, thus
4261 leaving labels with too low uses counts. */
4262 if (! optimize)
4263 {
4264 JUMP_LABEL (insn) = olabel;
4265 LABEL_NUSES (olabel)++;
4266 }
4267 if (! bp)
4268 {
4269 bp = (struct far_branch *) alloca (sizeof *bp);
4270 uid_branch[dest_uid] = bp;
4271 bp->prev = far_branch_list;
4272 far_branch_list = bp;
4273 bp->far_label
4274 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4275 LABEL_NUSES (bp->far_label)++;
4276 }
4277 else
4278 {
4279 label = bp->near_label;
4280 if (! label && bp->address - addr >= CONDJUMP_MIN)
4281 {
4282 rtx block = bp->insert_place;
4283
4284 if (GET_CODE (PATTERN (block)) == RETURN)
4285 block = PREV_INSN (block);
4286 else
4287 block = gen_block_redirect (block,
4288 bp->address, 2);
4289 label = emit_label_after (gen_label_rtx (),
4290 PREV_INSN (block));
4291 bp->near_label = label;
4292 }
4293 else if (label && ! NEXT_INSN (label))
4294 {
4295 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4296 bp->insert_place = insn;
4297 else
4298 gen_far_branch (bp);
4299 }
4300 }
4301 if (! label
4302 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4303 {
4304 bp->near_label = label = gen_label_rtx ();
4305 bp->insert_place = insn;
4306 bp->address = addr;
4307 }
4308 if (! redirect_jump (insn, label, 1))
4309 abort ();
4310 }
4311 else
4312 {
4313 /* get_attr_length (insn) == 2 */
4314 /* Check if we have a pattern where reorg wants to redirect
4315 the branch to a label from an unconditional branch that
4316 is too far away. */
4317 /* We can't use JUMP_LABEL here because it might be undefined
4318 when not optimizing. */
4319 /* A syntax error might cause beyond to be NULL_RTX. */
4320 beyond
4321 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4322 0));
4323
4324 if (beyond
4325 && (GET_CODE (beyond) == JUMP_INSN
4326 || ((beyond = next_active_insn (beyond))
4327 && GET_CODE (beyond) == JUMP_INSN))
4328 && GET_CODE (PATTERN (beyond)) == SET
4329 && recog_memoized (beyond) == CODE_FOR_jump_compact
4330 && ((INSN_ADDRESSES
4331 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4332 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4333 > 252 + 258 + 2))
4334 gen_block_redirect (beyond,
4335 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4336 }
4337
4338 next = next_active_insn (insn);
4339
4340 if ((GET_CODE (next) == JUMP_INSN
4341 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4342 && GET_CODE (PATTERN (next)) == SET
4343 && recog_memoized (next) == CODE_FOR_jump_compact
4344 && ((INSN_ADDRESSES
4345 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4346 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4347 > 252 + 258 + 2))
4348 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4349 }
4350 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4351 {
4352 int addr = INSN_ADDRESSES (INSN_UID (insn));
4353 rtx far_label = 0;
4354 int dest_uid = 0;
4355 struct far_branch *bp;
4356
4357 if (type == TYPE_JUMP)
4358 {
4359 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4360 dest_uid = get_dest_uid (far_label, max_uid);
4361 if (! dest_uid)
4362 {
4363 /* Parse errors can lead to labels outside
4364 the insn stream. */
4365 if (! NEXT_INSN (far_label))
4366 continue;
4367
4368 if (! optimize)
4369 {
4370 JUMP_LABEL (insn) = far_label;
4371 LABEL_NUSES (far_label)++;
4372 }
4373 redirect_jump (insn, NULL_RTX, 1);
4374 far_label = 0;
4375 }
4376 }
4377 bp = uid_branch[dest_uid];
4378 if (! bp)
4379 {
4380 bp = (struct far_branch *) alloca (sizeof *bp);
4381 uid_branch[dest_uid] = bp;
4382 bp->prev = far_branch_list;
4383 far_branch_list = bp;
4384 bp->near_label = 0;
4385 bp->far_label = far_label;
4386 if (far_label)
4387 LABEL_NUSES (far_label)++;
4388 }
4389 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4390 if (addr - bp->address <= CONDJUMP_MAX)
4391 emit_label_after (bp->near_label, PREV_INSN (insn));
4392 else
4393 {
4394 gen_far_branch (bp);
4395 bp->near_label = 0;
4396 }
4397 else
4398 bp->near_label = 0;
4399 bp->address = addr;
4400 bp->insert_place = insn;
4401 if (! far_label)
4402 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4403 else
4404 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4405 }
4406 }
4407 /* Generate all pending far branches,
4408 and free our references to the far labels. */
4409 while (far_branch_list)
4410 {
4411 if (far_branch_list->near_label
4412 && ! NEXT_INSN (far_branch_list->near_label))
4413 gen_far_branch (far_branch_list);
4414 if (optimize
4415 && far_branch_list->far_label
4416 && ! --LABEL_NUSES (far_branch_list->far_label))
4417 delete_insn (far_branch_list->far_label);
4418 far_branch_list = far_branch_list->prev;
4419 }
4420
4421 /* Instruction length information is no longer valid due to the new
4422 instructions that have been generated. */
4423 init_insn_lengths ();
4424 }
4425
4426 /* Dump out instruction addresses, which is useful for debugging the
4427 constant pool table stuff.
4428
4429 If relaxing, output the label and pseudo-ops used to link together
4430 calls and the instruction which set the registers. */
4431
4432 /* ??? The addresses printed by this routine for insns are nonsense for
4433 insns which are inside of a sequence where none of the inner insns have
4434 variable length. This is because the second pass of shorten_branches
4435 does not bother to update them. */
4436
4437 void
4438 final_prescan_insn (insn, opvec, noperands)
4439 rtx insn;
4440 rtx *opvec ATTRIBUTE_UNUSED;
4441 int noperands ATTRIBUTE_UNUSED;
4442 {
4443 if (TARGET_DUMPISIZE)
4444 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4445
4446 if (TARGET_RELAX)
4447 {
4448 rtx note;
4449
4450 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4451 if (note)
4452 {
4453 rtx pattern;
4454
4455 pattern = PATTERN (insn);
4456 if (GET_CODE (pattern) == PARALLEL)
4457 pattern = XVECEXP (pattern, 0, 0);
4458 if (GET_CODE (pattern) == CALL
4459 || (GET_CODE (pattern) == SET
4460 && (GET_CODE (SET_SRC (pattern)) == CALL
4461 || get_attr_type (insn) == TYPE_SFUNC)))
4462 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4463 CODE_LABEL_NUMBER (XEXP (note, 0)));
4464 else if (GET_CODE (pattern) == SET)
4465 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4466 CODE_LABEL_NUMBER (XEXP (note, 0)));
4467 else
4468 abort ();
4469 }
4470 }
4471 }
4472
4473 /* Dump out any constants accumulated in the final pass. These will
4474 only be labels. */
4475
4476 const char *
4477 output_jump_label_table ()
4478 {
4479 int i;
4480
4481 if (pool_size)
4482 {
4483 fprintf (asm_out_file, "\t.align 2\n");
4484 for (i = 0; i < pool_size; i++)
4485 {
4486 pool_node *p = &pool_vector[i];
4487
4488 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4489 CODE_LABEL_NUMBER (p->label));
4490 output_asm_insn (".long %O0", &p->value);
4491 }
4492 pool_size = 0;
4493 }
4494
4495 return "";
4496 }
4497 \f
4498 /* A full frame looks like:
4499
4500 arg-5
4501 arg-4
4502 [ if current_function_anonymous_args
4503 arg-3
4504 arg-2
4505 arg-1
4506 arg-0 ]
4507 saved-fp
4508 saved-r10
4509 saved-r11
4510 saved-r12
4511 saved-pr
4512 local-n
4513 ..
4514 local-1
4515 local-0 <- fp points here. */
4516
4517 /* Number of bytes pushed for anonymous args, used to pass information
4518 between expand_prologue and expand_epilogue. */
4519
4520 static int extra_push;
4521
4522 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
4523 to be adjusted, and TEMP, if nonnegative, holds the register number
4524 of a general register that we may clobber. */
4525
4526 static void
4527 output_stack_adjust (size, reg, temp, emit_fn)
4528 int size;
4529 rtx reg;
4530 int temp;
4531 rtx (*emit_fn) PARAMS ((rtx));
4532 {
4533 if (size)
4534 {
4535 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4536
4537 if (size % align)
4538 abort ();
4539
4540 if (CONST_OK_FOR_ADD (size))
4541 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4542 /* Try to do it with two partial adjustments; however, we must make
4543 sure that the stack is properly aligned at all times, in case
4544 an interrupt occurs between the two partial adjustments. */
4545 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4546 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4547 {
4548 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4549 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4550 }
4551 else
4552 {
4553 rtx const_reg;
4554 rtx insn;
4555
4556 /* If TEMP is invalid, we could temporarily save a general
4557 register to MACL. However, there is currently no need
4558 to handle this case, so just abort when we see it. */
4559 if (temp < 0)
4560 abort ();
4561 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4562
4563 /* If SIZE is negative, subtract the positive value.
4564 This sometimes allows a constant pool entry to be shared
4565 between prologue and epilogue code. */
4566 if (size < 0)
4567 {
4568 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4569 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4570 }
4571 else
4572 {
4573 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4574 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4575 }
4576 if (emit_fn == frame_insn)
4577 REG_NOTES (insn)
4578 = (gen_rtx_EXPR_LIST
4579 (REG_FRAME_RELATED_EXPR,
4580 gen_rtx_SET (VOIDmode, reg,
4581 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4582 REG_NOTES (insn)));
4583 }
4584 }
4585 }
4586
4587 static rtx
4588 frame_insn (x)
4589 rtx x;
4590 {
4591 x = emit_insn (x);
4592 RTX_FRAME_RELATED_P (x) = 1;
4593 return x;
4594 }
4595
4596 /* Output RTL to push register RN onto the stack. */
4597
4598 static rtx
4599 push (rn)
4600 int rn;
4601 {
4602 rtx x;
4603 if (rn == FPUL_REG)
4604 x = gen_push_fpul ();
4605 else if (rn == FPSCR_REG)
4606 x = gen_push_fpscr ();
4607 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4608 && FP_OR_XD_REGISTER_P (rn))
4609 {
4610 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4611 return NULL_RTX;
4612 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4613 }
4614 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4615 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4616 else
4617 x = gen_push (gen_rtx_REG (SImode, rn));
4618
4619 x = frame_insn (x);
4620 REG_NOTES (x)
4621 = gen_rtx_EXPR_LIST (REG_INC,
4622 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4623 return x;
4624 }
4625
4626 /* Output RTL to pop register RN from the stack. */
4627
4628 static void
4629 pop (rn)
4630 int rn;
4631 {
4632 rtx x;
4633 if (rn == FPUL_REG)
4634 x = gen_pop_fpul ();
4635 else if (rn == FPSCR_REG)
4636 x = gen_pop_fpscr ();
4637 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4638 && FP_OR_XD_REGISTER_P (rn))
4639 {
4640 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4641 return;
4642 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4643 }
4644 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4645 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4646 else
4647 x = gen_pop (gen_rtx_REG (SImode, rn));
4648
4649 x = emit_insn (x);
4650 REG_NOTES (x)
4651 = gen_rtx_EXPR_LIST (REG_INC,
4652 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4653 }
4654
4655 /* Generate code to push the regs specified in the mask. */
4656
4657 static void
4658 push_regs (mask, interrupt_handler)
4659 HARD_REG_SET *mask;
4660 int interrupt_handler;
4661 {
4662 int i;
4663 int skip_fpscr = 0;
4664
4665 /* Push PR last; this gives better latencies after the prologue, and
4666 candidates for the return delay slot when there are no general
4667 registers pushed. */
4668 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4669 {
4670 /* If this is an interrupt handler, and the SZ bit varies,
4671 and we have to push any floating point register, we need
4672 to switch to the correct precision first. */
4673 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4674 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
4675 {
4676 HARD_REG_SET unsaved;
4677
4678 push (FPSCR_REG);
4679 COMPL_HARD_REG_SET(unsaved, *mask);
4680 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4681 skip_fpscr = 1;
4682 }
4683 if (i != PR_REG
4684 && (i != FPSCR_REG || ! skip_fpscr)
4685 && TEST_HARD_REG_BIT (*mask, i))
4686 push (i);
4687 }
4688 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4689 push (PR_REG);
4690 }
4691
4692 /* Work out the registers which need to be saved, both as a mask and a
4693 count of saved words. Return the count.
4694
4695 If doing a pragma interrupt function, then push all regs used by the
4696 function, and if we call another function (we can tell by looking at PR),
4697 make sure that all the regs it clobbers are safe too. */
4698
4699 static int
4700 calc_live_regs (live_regs_mask)
4701 HARD_REG_SET *live_regs_mask;
4702 {
4703 int reg;
4704 int count;
4705 int interrupt_handler;
4706 int pr_live;
4707
4708 interrupt_handler = sh_cfun_interrupt_handler_p ();
4709
4710 for (count = 0; 32 * count < FIRST_PSEUDO_REGISTER; count++)
4711 CLEAR_HARD_REG_SET (*live_regs_mask);
4712 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4713 && regs_ever_live[FPSCR_REG])
4714 target_flags &= ~FPU_SINGLE_BIT;
4715 /* If we can save a lot of saves by switching to double mode, do that. */
4716 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4717 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4718 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4719 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4720 && ++count > 2)
4721 {
4722 target_flags &= ~FPU_SINGLE_BIT;
4723 break;
4724 }
4725 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4726 knows how to use it. That means the pseudo originally allocated for
4727 the initial value can become the PR_MEDIA_REG hard register, as seen for
4728 execute/20010122-1.c:test9. */
4729 if (TARGET_SHMEDIA)
4730 pr_live = regs_ever_live[PR_MEDIA_REG];
4731 else
4732 {
4733 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4734 pr_live = (pr_initial
4735 ? (GET_CODE (pr_initial) != REG
4736 || REGNO (pr_initial) != (PR_REG))
4737 : regs_ever_live[PR_REG]);
4738 }
4739 /* Force PR to be live if the prologue has to call the SHmedia
4740 argument decoder or register saver. */
4741 if (TARGET_SHCOMPACT
4742 && ((current_function_args_info.call_cookie
4743 & ~ CALL_COOKIE_RET_TRAMP (1))
4744 || current_function_has_nonlocal_label))
4745 pr_live = 1;
4746 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4747 {
4748 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4749 ? pr_live
4750 : (interrupt_handler && ! pragma_trapa)
4751 ? (/* Need to save all the regs ever live. */
4752 (regs_ever_live[reg]
4753 || (call_used_regs[reg]
4754 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4755 && pr_live))
4756 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4757 && reg != RETURN_ADDRESS_POINTER_REGNUM
4758 && reg != T_REG && reg != GBR_REG
4759 /* Push fpscr only on targets which have FPU */
4760 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4761 : (/* Only push those regs which are used and need to be saved. */
4762 (TARGET_SHCOMPACT
4763 && flag_pic
4764 && current_function_args_info.call_cookie
4765 && reg == PIC_OFFSET_TABLE_REGNUM)
4766 || (regs_ever_live[reg] && ! call_used_regs[reg])
4767 || (current_function_calls_eh_return
4768 && (reg == EH_RETURN_DATA_REGNO (0)
4769 || reg == EH_RETURN_DATA_REGNO (1)
4770 || reg == EH_RETURN_DATA_REGNO (2)
4771 || reg == EH_RETURN_DATA_REGNO (3)))))
4772 {
4773 SET_HARD_REG_BIT (*live_regs_mask, reg);
4774 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4775
4776 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
4777 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
4778 {
4779 if (FP_REGISTER_P (reg))
4780 {
4781 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4782 {
4783 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
4784 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
4785 }
4786 }
4787 else if (XD_REGISTER_P (reg))
4788 {
4789 /* Must switch to double mode to access these registers. */
4790 target_flags &= ~FPU_SINGLE_BIT;
4791 }
4792 }
4793 }
4794 }
4795
4796 return count;
4797 }
4798
4799 /* Code to generate prologue and epilogue sequences */
4800
4801 /* PUSHED is the number of bytes that are being pushed on the
4802 stack for register saves. Return the frame size, padded
4803 appropriately so that the stack stays properly aligned. */
4804 static HOST_WIDE_INT
4805 rounded_frame_size (pushed)
4806 int pushed;
4807 {
4808 HOST_WIDE_INT size = get_frame_size ();
4809 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4810
4811 return ((size + pushed + align - 1) & -align) - pushed;
4812 }
4813
4814 /* Choose a call-clobbered target-branch register that remains
4815 unchanged along the whole function. We set it up as the return
4816 value in the prologue. */
4817 int
4818 sh_media_register_for_return ()
4819 {
4820 int regno;
4821 int tr0_used;
4822
4823 if (! current_function_is_leaf)
4824 return -1;
4825
4826 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
4827
4828 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
4829 if (call_used_regs[regno] && ! regs_ever_live[regno])
4830 return regno;
4831
4832 return -1;
4833 }
4834
4835 void
4836 sh_expand_prologue ()
4837 {
4838 HARD_REG_SET live_regs_mask;
4839 int d, i;
4840 int d_rounding = 0;
4841 int save_flags = target_flags;
4842
4843 current_function_interrupt = sh_cfun_interrupt_handler_p ();
4844
4845 /* We have pretend args if we had an object sent partially in registers
4846 and partially on the stack, e.g. a large structure. */
4847 output_stack_adjust (-current_function_pretend_args_size
4848 - current_function_args_info.stack_regs * 8,
4849 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
4850
4851 extra_push = 0;
4852
4853 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
4854 /* We're going to use the PIC register to load the address of the
4855 incoming-argument decoder and/or of the return trampoline from
4856 the GOT, so make sure the PIC register is preserved and
4857 initialized. */
4858 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
4859
4860 if (TARGET_SHCOMPACT
4861 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
4862 {
4863 int reg;
4864
4865 /* First, make all registers with incoming arguments that will
4866 be pushed onto the stack live, so that register renaming
4867 doesn't overwrite them. */
4868 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
4869 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
4870 >= NPARM_REGS (SImode) - reg)
4871 for (; reg < NPARM_REGS (SImode); reg++)
4872 emit_insn (gen_shcompact_preserve_incoming_args
4873 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4874 else if (CALL_COOKIE_INT_REG_GET
4875 (current_function_args_info.call_cookie, reg) == 1)
4876 emit_insn (gen_shcompact_preserve_incoming_args
4877 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
4878
4879 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
4880 stack_pointer_rtx);
4881 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
4882 GEN_INT (current_function_args_info.call_cookie));
4883 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
4884 gen_rtx_REG (SImode, R0_REG));
4885 }
4886 else if (TARGET_SHMEDIA)
4887 {
4888 int tr = sh_media_register_for_return ();
4889
4890 if (tr >= 0)
4891 {
4892 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
4893 gen_rtx_REG (DImode, PR_MEDIA_REG));
4894
4895 /* If this function only exits with sibcalls, this copy
4896 will be flagged as dead. */
4897 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4898 const0_rtx,
4899 REG_NOTES (insn));
4900 }
4901 }
4902
4903 /* Emit the code for SETUP_VARARGS. */
4904 if (current_function_stdarg)
4905 {
4906 /* This is not used by the SH2E calling convention */
4907 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 && ! TARGET_HITACHI)
4908 {
4909 /* Push arg regs as if they'd been provided by caller in stack. */
4910 for (i = 0; i < NPARM_REGS(SImode); i++)
4911 {
4912 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4913 rtx insn;
4914
4915 if (i >= (NPARM_REGS(SImode)
4916 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4917 ))
4918 break;
4919 insn = push (rn);
4920 RTX_FRAME_RELATED_P (insn) = 0;
4921 extra_push += 4;
4922 }
4923 }
4924 }
4925
4926 /* If we're supposed to switch stacks at function entry, do so now. */
4927 if (sp_switch)
4928 emit_insn (gen_sp_switch_1 ());
4929
4930 d = calc_live_regs (&live_regs_mask);
4931 /* ??? Maybe we could save some switching if we can move a mode switch
4932 that already happens to be at the function start into the prologue. */
4933 if (target_flags != save_flags && ! current_function_interrupt)
4934 emit_insn (gen_toggle_sz ());
4935
4936 if (TARGET_SH5)
4937 {
4938 int i;
4939 int offset;
4940 int align;
4941 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
4942 int offset_in_r0 = -1;
4943 int sp_in_r0 = 0;
4944
4945 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
4946 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
4947 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
4948
4949 offset = d + d_rounding;
4950 output_stack_adjust (-offset, stack_pointer_rtx, 1, frame_insn);
4951
4952 /* We loop twice: first, we save 8-byte aligned registers in the
4953 higher addresses, that are known to be aligned. Then, we
4954 proceed to saving 32-bit registers that don't need 8-byte
4955 alignment. */
4956 /* Note that if you change this code in a way that affects where
4957 the return register is saved, you have to update not only
4958 sh_expand_epilogue, but also sh_set_return_address. */
4959 for (align = 1; align >= 0; align--)
4960 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
4961 if (TEST_HARD_REG_BIT (live_regs_mask, i))
4962 {
4963 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
4964 int reg = i;
4965 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
4966
4967 if (mode == SFmode && (i % 2) == 1
4968 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
4969 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
4970 {
4971 mode = DFmode;
4972 i--;
4973 reg--;
4974 }
4975
4976 /* If we're doing the aligned pass and this is not aligned,
4977 or we're doing the unaligned pass and this is aligned,
4978 skip it. */
4979 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
4980 == 0) != align)
4981 continue;
4982
4983 offset -= GET_MODE_SIZE (mode);
4984
4985 reg_rtx = gen_rtx_REG (mode, reg);
4986
4987 mem_rtx = gen_rtx_MEM (mode,
4988 gen_rtx_PLUS (Pmode,
4989 stack_pointer_rtx,
4990 GEN_INT (offset)));
4991
4992 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
4993
4994 mem_rtx = NULL_RTX;
4995
4996 try_pre_dec:
4997 do
4998 if (HAVE_PRE_DECREMENT
4999 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5000 || mem_rtx == NULL_RTX
5001 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5002 {
5003 pre_dec = gen_rtx_MEM (mode,
5004 gen_rtx_PRE_DEC (Pmode, r0));
5005
5006 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5007 pre_dec_ok);
5008
5009 pre_dec = NULL_RTX;
5010
5011 break;
5012
5013 pre_dec_ok:
5014 mem_rtx = NULL_RTX;
5015 offset += GET_MODE_SIZE (mode);
5016 }
5017 while (0);
5018
5019 if (mem_rtx != NULL_RTX)
5020 goto addr_ok;
5021
5022 if (offset_in_r0 == -1)
5023 {
5024 emit_move_insn (r0, GEN_INT (offset));
5025 offset_in_r0 = offset;
5026 }
5027 else if (offset != offset_in_r0)
5028 {
5029 emit_move_insn (r0,
5030 gen_rtx_PLUS
5031 (Pmode, r0,
5032 GEN_INT (offset - offset_in_r0)));
5033 offset_in_r0 += offset - offset_in_r0;
5034 }
5035
5036 if (pre_dec != NULL_RTX)
5037 {
5038 if (! sp_in_r0)
5039 {
5040 emit_move_insn (r0,
5041 gen_rtx_PLUS
5042 (Pmode, r0, stack_pointer_rtx));
5043 sp_in_r0 = 1;
5044 }
5045
5046 offset -= GET_MODE_SIZE (mode);
5047 offset_in_r0 -= GET_MODE_SIZE (mode);
5048
5049 mem_rtx = pre_dec;
5050 }
5051 else if (sp_in_r0)
5052 mem_rtx = gen_rtx_MEM (mode, r0);
5053 else
5054 mem_rtx = gen_rtx_MEM (mode,
5055 gen_rtx_PLUS (Pmode,
5056 stack_pointer_rtx,
5057 r0));
5058
5059 /* We must not use an r0-based address for target-branch
5060 registers or for special registers without pre-dec
5061 memory addresses, since we store their values in r0
5062 first. */
5063 if (TARGET_REGISTER_P (i)
5064 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5065 && mem_rtx != pre_dec))
5066 abort ();
5067
5068 addr_ok:
5069 if (TARGET_REGISTER_P (i)
5070 || ((i == PR_REG || SPECIAL_REGISTER_P (i))
5071 && mem_rtx != pre_dec))
5072 {
5073 rtx r0mode = gen_rtx_REG (GET_MODE (reg_rtx), R0_REG);
5074
5075 emit_move_insn (r0mode, reg_rtx);
5076
5077 offset_in_r0 = -1;
5078 sp_in_r0 = 0;
5079
5080 reg_rtx = r0mode;
5081 }
5082
5083 emit_move_insn (mem_rtx, reg_rtx);
5084 }
5085
5086 if (offset != d_rounding)
5087 abort ();
5088 }
5089 else
5090 push_regs (&live_regs_mask, current_function_interrupt);
5091
5092 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5093 {
5094 rtx insn = get_last_insn ();
5095 rtx last = emit_insn (gen_GOTaddr2picreg ());
5096
5097 /* Mark these insns as possibly dead. Sometimes, flow2 may
5098 delete all uses of the PIC register. In this case, let it
5099 delete the initialization too. */
5100 do
5101 {
5102 insn = NEXT_INSN (insn);
5103
5104 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5105 const0_rtx,
5106 REG_NOTES (insn));
5107 }
5108 while (insn != last);
5109 }
5110
5111 if (SHMEDIA_REGS_STACK_ADJUST ())
5112 {
5113 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5114 function_symbol (TARGET_FPU_ANY
5115 ? "__GCC_push_shmedia_regs"
5116 : "__GCC_push_shmedia_regs_nofpu"));
5117 /* This must NOT go through the PLT, otherwise mach and macl
5118 may be clobbered. */
5119 emit_insn (gen_shmedia_save_restore_regs_compact
5120 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5121 }
5122
5123 if (target_flags != save_flags && ! current_function_interrupt)
5124 {
5125 rtx insn = emit_insn (gen_toggle_sz ());
5126
5127 /* If we're lucky, a mode switch in the function body will
5128 overwrite fpscr, turning this insn dead. Tell flow this
5129 insn is ok to delete. */
5130 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5131 const0_rtx,
5132 REG_NOTES (insn));
5133 }
5134
5135 target_flags = save_flags;
5136
5137 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5138 stack_pointer_rtx, TARGET_SH5 ? 0 : 1, frame_insn);
5139
5140 if (frame_pointer_needed)
5141 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5142
5143 if (TARGET_SHCOMPACT
5144 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5145 {
5146 /* This must NOT go through the PLT, otherwise mach and macl
5147 may be clobbered. */
5148 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5149 function_symbol ("__GCC_shcompact_incoming_args"));
5150 emit_insn (gen_shcompact_incoming_args ());
5151 }
5152 }
5153
5154 void
5155 sh_expand_epilogue ()
5156 {
5157 HARD_REG_SET live_regs_mask;
5158 int d, i;
5159 int d_rounding = 0;
5160
5161 int save_flags = target_flags;
5162 int frame_size;
5163 int fpscr_deferred = 0;
5164
5165 d = calc_live_regs (&live_regs_mask);
5166
5167 if (TARGET_SH5 && d % (STACK_BOUNDARY / BITS_PER_UNIT))
5168 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5169 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5170
5171 frame_size = rounded_frame_size (d) - d_rounding;
5172
5173 if (frame_pointer_needed)
5174 {
5175 output_stack_adjust (frame_size, frame_pointer_rtx, 7, emit_insn);
5176
5177 /* We must avoid moving the stack pointer adjustment past code
5178 which reads from the local frame, else an interrupt could
5179 occur after the SP adjustment and clobber data in the local
5180 frame. */
5181 emit_insn (gen_blockage ());
5182 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5183 }
5184 else if (frame_size)
5185 {
5186 /* We must avoid moving the stack pointer adjustment past code
5187 which reads from the local frame, else an interrupt could
5188 occur after the SP adjustment and clobber data in the local
5189 frame. */
5190 emit_insn (gen_blockage ());
5191 output_stack_adjust (frame_size, stack_pointer_rtx, 7, emit_insn);
5192 }
5193
5194 if (SHMEDIA_REGS_STACK_ADJUST ())
5195 {
5196 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5197 function_symbol (TARGET_FPU_ANY
5198 ? "__GCC_pop_shmedia_regs"
5199 : "__GCC_pop_shmedia_regs_nofpu"));
5200 /* This must NOT go through the PLT, otherwise mach and macl
5201 may be clobbered. */
5202 emit_insn (gen_shmedia_save_restore_regs_compact
5203 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5204 }
5205
5206 /* Pop all the registers. */
5207
5208 if (target_flags != save_flags && ! current_function_interrupt)
5209 emit_insn (gen_toggle_sz ());
5210 if (TARGET_SH5)
5211 {
5212 int offset = d_rounding;
5213 int offset_in_r0 = -1;
5214 int sp_in_r0 = 0;
5215 int align;
5216 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5217 int tmp_regno = R20_REG;
5218
5219 /* We loop twice: first, we save 8-byte aligned registers in the
5220 higher addresses, that are known to be aligned. Then, we
5221 proceed to saving 32-bit registers that don't need 8-byte
5222 alignment. */
5223 for (align = 0; align <= 1; align++)
5224 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5225 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5226 {
5227 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5228 int reg = i;
5229 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5230
5231 if (mode == SFmode && (i % 2) == 0
5232 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5233 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5234 {
5235 mode = DFmode;
5236 i++;
5237 }
5238
5239 /* If we're doing the aligned pass and this is not aligned,
5240 or we're doing the unaligned pass and this is aligned,
5241 skip it. */
5242 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5243 == 0) != align)
5244 continue;
5245
5246 reg_rtx = gen_rtx_REG (mode, reg);
5247
5248 mem_rtx = gen_rtx_MEM (mode,
5249 gen_rtx_PLUS (Pmode,
5250 stack_pointer_rtx,
5251 GEN_INT (offset)));
5252
5253 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5254
5255 mem_rtx = NULL_RTX;
5256
5257 try_post_inc:
5258 do
5259 if (HAVE_POST_INCREMENT
5260 && (offset == offset_in_r0
5261 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5262 && mem_rtx == NULL_RTX)
5263 || i == PR_REG || SPECIAL_REGISTER_P (i)))
5264 {
5265 post_inc = gen_rtx_MEM (mode,
5266 gen_rtx_POST_INC (Pmode, r0));
5267
5268 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5269 post_inc_ok);
5270
5271 post_inc = NULL_RTX;
5272
5273 break;
5274
5275 post_inc_ok:
5276 mem_rtx = NULL_RTX;
5277 }
5278 while (0);
5279
5280 if (mem_rtx != NULL_RTX)
5281 goto addr_ok;
5282
5283 if (offset_in_r0 == -1)
5284 {
5285 emit_move_insn (r0, GEN_INT (offset));
5286 offset_in_r0 = offset;
5287 }
5288 else if (offset != offset_in_r0)
5289 {
5290 emit_move_insn (r0,
5291 gen_rtx_PLUS
5292 (Pmode, r0,
5293 GEN_INT (offset - offset_in_r0)));
5294 offset_in_r0 += offset - offset_in_r0;
5295 }
5296
5297 if (post_inc != NULL_RTX)
5298 {
5299 if (! sp_in_r0)
5300 {
5301 emit_move_insn (r0,
5302 gen_rtx_PLUS
5303 (Pmode, r0, stack_pointer_rtx));
5304 sp_in_r0 = 1;
5305 }
5306
5307 mem_rtx = post_inc;
5308
5309 offset_in_r0 += GET_MODE_SIZE (mode);
5310 }
5311 else if (sp_in_r0)
5312 mem_rtx = gen_rtx_MEM (mode, r0);
5313 else
5314 mem_rtx = gen_rtx_MEM (mode,
5315 gen_rtx_PLUS (Pmode,
5316 stack_pointer_rtx,
5317 r0));
5318
5319 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5320 && mem_rtx != post_inc)
5321 abort ();
5322
5323 addr_ok:
5324 if ((i == PR_REG || SPECIAL_REGISTER_P (i))
5325 && mem_rtx != post_inc)
5326 {
5327 insn = emit_move_insn (r0, mem_rtx);
5328 mem_rtx = r0;
5329 }
5330 else if (TARGET_REGISTER_P (i))
5331 {
5332 rtx tmp_reg = gen_rtx_REG (mode, tmp_regno);
5333
5334 /* Give the scheduler a bit of freedom by using R20..R23
5335 in a round-robin fashion. Don't use R1 here because
5336 we want to use it for EH_RETURN_STACKADJ_RTX. */
5337 insn = emit_move_insn (tmp_reg, mem_rtx);
5338 mem_rtx = tmp_reg;
5339 if (++tmp_regno > R23_REG)
5340 tmp_regno = R20_REG;
5341 }
5342
5343 insn = emit_move_insn (reg_rtx, mem_rtx);
5344
5345 offset += GET_MODE_SIZE (mode);
5346 }
5347
5348 if (offset != d + d_rounding)
5349 abort ();
5350
5351 goto finish;
5352 }
5353 else
5354 d = 0;
5355 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5356 pop (PR_REG);
5357 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5358 {
5359 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5360
5361 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5362 && hard_regs_intersect_p (&live_regs_mask,
5363 &reg_class_contents[DF_REGS]))
5364 fpscr_deferred = 1;
5365 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5366 pop (j);
5367 if (j == FIRST_FP_REG && fpscr_deferred)
5368 pop (FPSCR_REG);
5369 }
5370 finish:
5371 if (target_flags != save_flags && ! current_function_interrupt)
5372 emit_insn (gen_toggle_sz ());
5373 target_flags = save_flags;
5374
5375 output_stack_adjust (extra_push + current_function_pretend_args_size
5376 + d + d_rounding
5377 + current_function_args_info.stack_regs * 8,
5378 stack_pointer_rtx, 7, emit_insn);
5379
5380 if (current_function_calls_eh_return)
5381 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5382 EH_RETURN_STACKADJ_RTX));
5383
5384 /* Switch back to the normal stack if necessary. */
5385 if (sp_switch)
5386 emit_insn (gen_sp_switch_2 ());
5387
5388 /* Tell flow the insn that pops PR isn't dead. */
5389 /* PR_REG will never be live in SHmedia mode, and we don't need to
5390 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5391 by the return pattern. */
5392 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5393 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5394 }
5395
5396 static int sh_need_epilogue_known = 0;
5397
5398 int
5399 sh_need_epilogue ()
5400 {
5401 if (! sh_need_epilogue_known)
5402 {
5403 rtx epilogue;
5404
5405 start_sequence ();
5406 sh_expand_epilogue ();
5407 epilogue = get_insns ();
5408 end_sequence ();
5409 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5410 }
5411 return sh_need_epilogue_known > 0;
5412 }
5413
5414 /* Emit code to change the current function's return address to RA.
5415 TEMP is available as a scratch register, if needed. */
5416
5417 void
5418 sh_set_return_address (ra, tmp)
5419 rtx ra, tmp;
5420 {
5421 HARD_REG_SET live_regs_mask;
5422 int d;
5423 int d_rounding = 0;
5424 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5425 int pr_offset;
5426
5427 d = calc_live_regs (&live_regs_mask);
5428
5429 /* If pr_reg isn't life, we can set it (or the register given in
5430 sh_media_register_for_return) directly. */
5431 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5432 {
5433 rtx rr;
5434
5435 if (TARGET_SHMEDIA)
5436 {
5437 int rr_regno = sh_media_register_for_return ();
5438
5439 if (rr_regno < 0)
5440 rr_regno = pr_reg;
5441
5442 rr = gen_rtx_REG (DImode, rr_regno);
5443 }
5444 else
5445 rr = gen_rtx_REG (SImode, pr_reg);
5446
5447 emit_insn (GEN_MOV (rr, ra));
5448 /* Tell flow the register for return isn't dead. */
5449 emit_insn (gen_rtx_USE (VOIDmode, rr));
5450 return;
5451 }
5452
5453 if (TARGET_SH5)
5454 {
5455 int i;
5456 int offset;
5457 int align;
5458
5459 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5460 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5461 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5462
5463 offset = 0;
5464
5465 /* We loop twice: first, we save 8-byte aligned registers in the
5466 higher addresses, that are known to be aligned. Then, we
5467 proceed to saving 32-bit registers that don't need 8-byte
5468 alignment. */
5469 for (align = 0; align <= 1; align++)
5470 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5471 if (TEST_HARD_REG_BIT (live_regs_mask, i))
5472 {
5473 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5474
5475 if (mode == SFmode && (i % 2) == 0
5476 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5477 && (TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1))))
5478 {
5479 mode = DFmode;
5480 i++;
5481 }
5482
5483 /* If we're doing the aligned pass and this is not aligned,
5484 or we're doing the unaligned pass and this is aligned,
5485 skip it. */
5486 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
5487 == 0) != align)
5488 continue;
5489
5490 if (i == pr_reg)
5491 goto found;
5492
5493 offset += GET_MODE_SIZE (mode);
5494 }
5495
5496 /* We can't find pr register. */
5497 abort ();
5498
5499 found:
5500 pr_offset = (rounded_frame_size (d) - d_rounding + offset
5501 + SHMEDIA_REGS_STACK_ADJUST ());
5502 }
5503 else
5504 pr_offset = rounded_frame_size (d) - d_rounding;
5505
5506 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5507 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5508
5509 tmp = gen_rtx_MEM (Pmode, tmp);
5510 emit_insn (GEN_MOV (tmp, ra));
5511 }
5512
5513 /* Clear variables at function end. */
5514
5515 static void
5516 sh_output_function_epilogue (file, size)
5517 FILE *file ATTRIBUTE_UNUSED;
5518 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5519 {
5520 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5521 sh_need_epilogue_known = 0;
5522 sp_switch = NULL_RTX;
5523 }
5524
5525 rtx
5526 sh_builtin_saveregs ()
5527 {
5528 /* First unnamed integer register. */
5529 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5530 /* Number of integer registers we need to save. */
5531 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5532 /* First unnamed SFmode float reg */
5533 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5534 /* Number of SFmode float regs to save. */
5535 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5536 rtx regbuf, fpregs;
5537 int bufsize, regno;
5538 HOST_WIDE_INT alias_set;
5539
5540 if (TARGET_SH5)
5541 {
5542 if (n_intregs)
5543 {
5544 int pushregs = n_intregs;
5545
5546 while (pushregs < NPARM_REGS (SImode) - 1
5547 && (CALL_COOKIE_INT_REG_GET
5548 (current_function_args_info.call_cookie,
5549 NPARM_REGS (SImode) - pushregs)
5550 == 1))
5551 {
5552 current_function_args_info.call_cookie
5553 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5554 - pushregs, 1);
5555 pushregs++;
5556 }
5557
5558 if (pushregs == NPARM_REGS (SImode))
5559 current_function_args_info.call_cookie
5560 |= (CALL_COOKIE_INT_REG (0, 1)
5561 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5562 else
5563 current_function_args_info.call_cookie
5564 |= CALL_COOKIE_STACKSEQ (pushregs);
5565
5566 current_function_pretend_args_size += 8 * n_intregs;
5567 }
5568 if (TARGET_SHCOMPACT)
5569 return const0_rtx;
5570 }
5571
5572 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5573 {
5574 error ("__builtin_saveregs not supported by this subtarget");
5575 return const0_rtx;
5576 }
5577
5578 if (TARGET_SHMEDIA)
5579 n_floatregs = 0;
5580
5581 /* Allocate block of memory for the regs. */
5582 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5583 Or can assign_stack_local accept a 0 SIZE argument? */
5584 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5585
5586 if (TARGET_SHMEDIA)
5587 regbuf = gen_rtx_MEM (BLKmode,
5588 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5589 else if (n_floatregs & 1)
5590 {
5591 rtx addr;
5592
5593 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5594 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5595 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
5596 regbuf = change_address (regbuf, BLKmode, addr);
5597 }
5598 else
5599 regbuf = assign_stack_local (BLKmode, bufsize, 0);
5600 alias_set = get_varargs_alias_set ();
5601 set_mem_alias_set (regbuf, alias_set);
5602
5603 /* Save int args.
5604 This is optimized to only save the regs that are necessary. Explicitly
5605 named args need not be saved. */
5606 if (n_intregs > 0)
5607 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
5608 adjust_address (regbuf, BLKmode,
5609 n_floatregs * UNITS_PER_WORD),
5610 n_intregs);
5611
5612 if (TARGET_SHMEDIA)
5613 /* Return the address of the regbuf. */
5614 return XEXP (regbuf, 0);
5615
5616 /* Save float args.
5617 This is optimized to only save the regs that are necessary. Explicitly
5618 named args need not be saved.
5619 We explicitly build a pointer to the buffer because it halves the insn
5620 count when not optimizing (otherwise the pointer is built for each reg
5621 saved).
5622 We emit the moves in reverse order so that we can use predecrement. */
5623
5624 fpregs = gen_reg_rtx (Pmode);
5625 emit_move_insn (fpregs, XEXP (regbuf, 0));
5626 emit_insn (gen_addsi3 (fpregs, fpregs,
5627 GEN_INT (n_floatregs * UNITS_PER_WORD)));
5628 if (TARGET_SH4)
5629 {
5630 rtx mem;
5631 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
5632 {
5633 emit_insn (gen_addsi3 (fpregs, fpregs,
5634 GEN_INT (-2 * UNITS_PER_WORD)));
5635 mem = gen_rtx_MEM (DFmode, fpregs);
5636 set_mem_alias_set (mem, alias_set);
5637 emit_move_insn (mem,
5638 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
5639 }
5640 regno = first_floatreg;
5641 if (regno & 1)
5642 {
5643 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5644 mem = gen_rtx_MEM (SFmode, fpregs);
5645 set_mem_alias_set (mem, alias_set);
5646 emit_move_insn (mem,
5647 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
5648 - (TARGET_LITTLE_ENDIAN != 0)));
5649 }
5650 }
5651 else
5652 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
5653 {
5654 rtx mem;
5655
5656 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
5657 mem = gen_rtx_MEM (SFmode, fpregs);
5658 set_mem_alias_set (mem, alias_set);
5659 emit_move_insn (mem,
5660 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
5661 }
5662
5663 /* Return the address of the regbuf. */
5664 return XEXP (regbuf, 0);
5665 }
5666
5667 /* Define the `__builtin_va_list' type for the ABI. */
5668
5669 tree
5670 sh_build_va_list ()
5671 {
5672 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5673 tree record;
5674
5675 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5676 return ptr_type_node;
5677
5678 record = make_node (RECORD_TYPE);
5679
5680 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
5681 ptr_type_node);
5682 f_next_o_limit = build_decl (FIELD_DECL,
5683 get_identifier ("__va_next_o_limit"),
5684 ptr_type_node);
5685 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
5686 ptr_type_node);
5687 f_next_fp_limit = build_decl (FIELD_DECL,
5688 get_identifier ("__va_next_fp_limit"),
5689 ptr_type_node);
5690 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
5691 ptr_type_node);
5692
5693 DECL_FIELD_CONTEXT (f_next_o) = record;
5694 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
5695 DECL_FIELD_CONTEXT (f_next_fp) = record;
5696 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
5697 DECL_FIELD_CONTEXT (f_next_stack) = record;
5698
5699 TYPE_FIELDS (record) = f_next_o;
5700 TREE_CHAIN (f_next_o) = f_next_o_limit;
5701 TREE_CHAIN (f_next_o_limit) = f_next_fp;
5702 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
5703 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
5704
5705 layout_type (record);
5706
5707 return record;
5708 }
5709
5710 /* Implement `va_start' for varargs and stdarg. */
5711
5712 void
5713 sh_va_start (valist, nextarg)
5714 tree valist;
5715 rtx nextarg;
5716 {
5717 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5718 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5719 tree t, u;
5720 int nfp, nint;
5721
5722 if (TARGET_SH5)
5723 {
5724 expand_builtin_saveregs ();
5725 std_expand_builtin_va_start (valist, nextarg);
5726 return;
5727 }
5728
5729 if ((! TARGET_SH2E && ! TARGET_SH4) || TARGET_HITACHI)
5730 {
5731 std_expand_builtin_va_start (valist, nextarg);
5732 return;
5733 }
5734
5735 f_next_o = TYPE_FIELDS (va_list_type_node);
5736 f_next_o_limit = TREE_CHAIN (f_next_o);
5737 f_next_fp = TREE_CHAIN (f_next_o_limit);
5738 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5739 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5740
5741 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5742 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5743 valist, f_next_o_limit);
5744 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
5745 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5746 valist, f_next_fp_limit);
5747 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5748 valist, f_next_stack);
5749
5750 /* Call __builtin_saveregs. */
5751 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
5752 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
5753 TREE_SIDE_EFFECTS (t) = 1;
5754 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5755
5756 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
5757 if (nfp < 8)
5758 nfp = 8 - nfp;
5759 else
5760 nfp = 0;
5761 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5762 build_int_2 (UNITS_PER_WORD * nfp, 0)));
5763 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
5764 TREE_SIDE_EFFECTS (t) = 1;
5765 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5766
5767 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
5768 TREE_SIDE_EFFECTS (t) = 1;
5769 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5770
5771 nint = current_function_args_info.arg_count[SH_ARG_INT];
5772 if (nint < 4)
5773 nint = 4 - nint;
5774 else
5775 nint = 0;
5776 u = fold (build (PLUS_EXPR, ptr_type_node, u,
5777 build_int_2 (UNITS_PER_WORD * nint, 0)));
5778 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
5779 TREE_SIDE_EFFECTS (t) = 1;
5780 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5781
5782 u = make_tree (ptr_type_node, nextarg);
5783 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
5784 TREE_SIDE_EFFECTS (t) = 1;
5785 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5786 }
5787
5788 /* Implement `va_arg'. */
5789
5790 rtx
5791 sh_va_arg (valist, type)
5792 tree valist, type;
5793 {
5794 HOST_WIDE_INT size, rsize;
5795 tree tmp, pptr_type_node;
5796 rtx addr_rtx, r;
5797 rtx result;
5798 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
5799
5800 size = int_size_in_bytes (type);
5801 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5802 pptr_type_node = build_pointer_type (ptr_type_node);
5803
5804 if (pass_by_ref)
5805 type = build_pointer_type (type);
5806
5807 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4) && ! TARGET_HITACHI)
5808 {
5809 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
5810 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
5811 int pass_as_float;
5812 rtx lab_false, lab_over;
5813
5814 f_next_o = TYPE_FIELDS (va_list_type_node);
5815 f_next_o_limit = TREE_CHAIN (f_next_o);
5816 f_next_fp = TREE_CHAIN (f_next_o_limit);
5817 f_next_fp_limit = TREE_CHAIN (f_next_fp);
5818 f_next_stack = TREE_CHAIN (f_next_fp_limit);
5819
5820 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
5821 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
5822 valist, f_next_o_limit);
5823 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
5824 valist, f_next_fp);
5825 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
5826 valist, f_next_fp_limit);
5827 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
5828 valist, f_next_stack);
5829
5830 if (TARGET_SH4)
5831 {
5832 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
5833 || (TREE_CODE (type) == COMPLEX_TYPE
5834 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
5835 && size <= 16));
5836 }
5837 else
5838 {
5839 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
5840 }
5841
5842 addr_rtx = gen_reg_rtx (Pmode);
5843 lab_false = gen_label_rtx ();
5844 lab_over = gen_label_rtx ();
5845
5846 if (pass_as_float)
5847 {
5848 int first_floatreg
5849 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5850 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5851
5852 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
5853 EXPAND_NORMAL),
5854 expand_expr (next_fp_limit, NULL_RTX,
5855 Pmode, EXPAND_NORMAL),
5856 GE, const1_rtx, Pmode, 1, lab_false);
5857
5858 if (TYPE_ALIGN (type) > BITS_PER_WORD
5859 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
5860 && (n_floatregs & 1)))
5861 {
5862 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
5863 build_int_2 (UNITS_PER_WORD, 0));
5864 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
5865 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
5866 TREE_SIDE_EFFECTS (tmp) = 1;
5867 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5868 }
5869
5870 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
5871 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5872 if (r != addr_rtx)
5873 emit_move_insn (addr_rtx, r);
5874
5875 emit_jump_insn (gen_jump (lab_over));
5876 emit_barrier ();
5877 emit_label (lab_false);
5878
5879 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5880 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5881 if (r != addr_rtx)
5882 emit_move_insn (addr_rtx, r);
5883 }
5884 else
5885 {
5886 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
5887 build_int_2 (rsize, 0));
5888
5889 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
5890 EXPAND_NORMAL),
5891 expand_expr (next_o_limit, NULL_RTX,
5892 Pmode, EXPAND_NORMAL),
5893 GT, const1_rtx, Pmode, 1, lab_false);
5894
5895 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
5896 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5897 if (r != addr_rtx)
5898 emit_move_insn (addr_rtx, r);
5899
5900 emit_jump_insn (gen_jump (lab_over));
5901 emit_barrier ();
5902 emit_label (lab_false);
5903
5904 if (size > 4 && ! TARGET_SH4)
5905 {
5906 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
5907 TREE_SIDE_EFFECTS (tmp) = 1;
5908 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
5909 }
5910
5911 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
5912 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
5913 if (r != addr_rtx)
5914 emit_move_insn (addr_rtx, r);
5915 }
5916
5917 emit_label (lab_over);
5918
5919 tmp = make_tree (pptr_type_node, addr_rtx);
5920 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
5921 }
5922
5923 /* ??? In va-sh.h, there had been code to make values larger than
5924 size 8 indirect. This does not match the FUNCTION_ARG macros. */
5925
5926 result = std_expand_builtin_va_arg (valist, type);
5927 if (pass_by_ref)
5928 {
5929 #ifdef POINTERS_EXTEND_UNSIGNED
5930 if (GET_MODE (addr) != Pmode)
5931 addr = convert_memory_address (Pmode, result);
5932 #endif
5933 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
5934 set_mem_alias_set (result, get_varargs_alias_set ());
5935 }
5936 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
5937 argument to the varargs alias set. */
5938 return result;
5939 }
5940
5941 /* Define the offset between two registers, one to be eliminated, and
5942 the other its replacement, at the start of a routine. */
5943
5944 int
5945 initial_elimination_offset (from, to)
5946 int from;
5947 int to;
5948 {
5949 int regs_saved;
5950 int regs_saved_rounding = 0;
5951 int total_saved_regs_space;
5952 int total_auto_space;
5953 int save_flags = target_flags;
5954 int copy_flags;
5955
5956 HARD_REG_SET live_regs_mask;
5957 regs_saved = calc_live_regs (&live_regs_mask);
5958 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
5959 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
5960 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5961 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
5962
5963 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
5964 copy_flags = target_flags;
5965 target_flags = save_flags;
5966
5967 total_saved_regs_space = regs_saved + regs_saved_rounding;
5968
5969 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
5970 return total_saved_regs_space + total_auto_space
5971 + current_function_args_info.byref_regs * 8;
5972
5973 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5974 return total_saved_regs_space + total_auto_space
5975 + current_function_args_info.byref_regs * 8;
5976
5977 /* Initial gap between fp and sp is 0. */
5978 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
5979 return 0;
5980
5981 if (from == RETURN_ADDRESS_POINTER_REGNUM
5982 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
5983 {
5984 if (TARGET_SH5)
5985 {
5986 int i, n = total_saved_regs_space;
5987 int align;
5988 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5989
5990 n += total_auto_space;
5991
5992 /* If it wasn't saved, there's not much we can do. */
5993 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5994 return n;
5995
5996 target_flags = copy_flags;
5997
5998 /* We loop twice: first, check 8-byte aligned registers,
5999 that are stored in the higher addresses, that are known
6000 to be aligned. Then, check 32-bit registers that don't
6001 need 8-byte alignment. */
6002 for (align = 1; align >= 0; align--)
6003 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
6004 if (TEST_HARD_REG_BIT (live_regs_mask, i))
6005 {
6006 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
6007
6008 if (mode == SFmode && (i % 2) == 1
6009 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
6010 && TEST_HARD_REG_BIT (live_regs_mask, (i ^ 1)))
6011 {
6012 mode = DFmode;
6013 i--;
6014 }
6015
6016 /* If we're doing the aligned pass and this is not aligned,
6017 or we're doing the unaligned pass and this is aligned,
6018 skip it. */
6019 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT)
6020 == 0) != align)
6021 continue;
6022
6023 n -= GET_MODE_SIZE (mode);
6024
6025 if (i == pr_reg)
6026 {
6027 target_flags = save_flags;
6028 return n;
6029 }
6030 }
6031
6032 abort ();
6033 }
6034 else
6035 return total_auto_space;
6036 }
6037
6038 abort ();
6039 }
6040 \f
6041 /* Handle machine specific pragmas to be semi-compatible with Hitachi
6042 compiler. */
6043
6044 void
6045 sh_pr_interrupt (pfile)
6046 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6047 {
6048 pragma_interrupt = 1;
6049 }
6050
6051 void
6052 sh_pr_trapa (pfile)
6053 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6054 {
6055 pragma_interrupt = pragma_trapa = 1;
6056 }
6057
6058 void
6059 sh_pr_nosave_low_regs (pfile)
6060 struct cpp_reader *pfile ATTRIBUTE_UNUSED;
6061 {
6062 pragma_nosave_low_regs = 1;
6063 }
6064
6065 /* Generate 'handle_interrupt' attribute for decls */
6066
6067 static void
6068 sh_insert_attributes (node, attributes)
6069 tree node;
6070 tree * attributes;
6071 {
6072 if (! pragma_interrupt
6073 || TREE_CODE (node) != FUNCTION_DECL)
6074 return;
6075
6076 /* We are only interested in fields. */
6077 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6078 return;
6079
6080 /* Add a 'handle_interrupt' attribute. */
6081 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6082
6083 return;
6084 }
6085
6086 /* Supported attributes:
6087
6088 interrupt_handler -- specifies this function is an interrupt handler.
6089
6090 sp_switch -- specifies an alternate stack for an interrupt handler
6091 to run on.
6092
6093 trap_exit -- use a trapa to exit an interrupt function instead of
6094 an rte instruction. */
6095
6096 const struct attribute_spec sh_attribute_table[] =
6097 {
6098 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6099 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6100 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6101 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6102 { NULL, 0, 0, false, false, false, NULL }
6103 };
6104
6105 /* Handle an "interrupt_handler" attribute; arguments as in
6106 struct attribute_spec.handler. */
6107 static tree
6108 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
6109 tree *node;
6110 tree name;
6111 tree args ATTRIBUTE_UNUSED;
6112 int flags ATTRIBUTE_UNUSED;
6113 bool *no_add_attrs;
6114 {
6115 if (TREE_CODE (*node) != FUNCTION_DECL)
6116 {
6117 warning ("`%s' attribute only applies to functions",
6118 IDENTIFIER_POINTER (name));
6119 *no_add_attrs = true;
6120 }
6121 else if (TARGET_SHCOMPACT)
6122 {
6123 error ("attribute interrupt_handler is not compatible with -m5-compact");
6124 *no_add_attrs = true;
6125 }
6126
6127 return NULL_TREE;
6128 }
6129
6130 /* Handle an "sp_switch" attribute; arguments as in
6131 struct attribute_spec.handler. */
6132 static tree
6133 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
6134 tree *node;
6135 tree name;
6136 tree args;
6137 int flags ATTRIBUTE_UNUSED;
6138 bool *no_add_attrs;
6139 {
6140 if (TREE_CODE (*node) != FUNCTION_DECL)
6141 {
6142 warning ("`%s' attribute only applies to functions",
6143 IDENTIFIER_POINTER (name));
6144 *no_add_attrs = true;
6145 }
6146 else if (!pragma_interrupt)
6147 {
6148 /* The sp_switch attribute only has meaning for interrupt functions. */
6149 warning ("`%s' attribute only applies to interrupt functions",
6150 IDENTIFIER_POINTER (name));
6151 *no_add_attrs = true;
6152 }
6153 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6154 {
6155 /* The argument must be a constant string. */
6156 warning ("`%s' attribute argument not a string constant",
6157 IDENTIFIER_POINTER (name));
6158 *no_add_attrs = true;
6159 }
6160 else
6161 {
6162 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6163 TREE_STRING_POINTER (TREE_VALUE (args)));
6164 }
6165
6166 return NULL_TREE;
6167 }
6168
6169 /* Handle an "trap_exit" attribute; arguments as in
6170 struct attribute_spec.handler. */
6171 static tree
6172 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
6173 tree *node;
6174 tree name;
6175 tree args;
6176 int flags ATTRIBUTE_UNUSED;
6177 bool *no_add_attrs;
6178 {
6179 if (TREE_CODE (*node) != FUNCTION_DECL)
6180 {
6181 warning ("`%s' attribute only applies to functions",
6182 IDENTIFIER_POINTER (name));
6183 *no_add_attrs = true;
6184 }
6185 else if (!pragma_interrupt)
6186 {
6187 /* The trap_exit attribute only has meaning for interrupt functions. */
6188 warning ("`%s' attribute only applies to interrupt functions",
6189 IDENTIFIER_POINTER (name));
6190 *no_add_attrs = true;
6191 }
6192 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6193 {
6194 /* The argument must be a constant integer. */
6195 warning ("`%s' attribute argument not an integer constant",
6196 IDENTIFIER_POINTER (name));
6197 *no_add_attrs = true;
6198 }
6199 else
6200 {
6201 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6202 }
6203
6204 return NULL_TREE;
6205 }
6206
6207 int
6208 sh_cfun_interrupt_handler_p ()
6209 {
6210 return (lookup_attribute ("interrupt_handler",
6211 DECL_ATTRIBUTES (current_function_decl))
6212 != NULL_TREE);
6213 }
6214 \f
6215 /* Predicates used by the templates. */
6216
6217 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
6218 Used only in general_movsrc_operand. */
6219
6220 int
6221 system_reg_operand (op, mode)
6222 rtx op;
6223 enum machine_mode mode ATTRIBUTE_UNUSED;
6224 {
6225 switch (REGNO (op))
6226 {
6227 case PR_REG:
6228 case MACL_REG:
6229 case MACH_REG:
6230 return 1;
6231 }
6232 return 0;
6233 }
6234
6235 /* Returns 1 if OP can be source of a simple move operation.
6236 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
6237 invalid as are subregs of system registers. */
6238
6239 int
6240 general_movsrc_operand (op, mode)
6241 rtx op;
6242 enum machine_mode mode;
6243 {
6244 if (GET_CODE (op) == MEM)
6245 {
6246 rtx inside = XEXP (op, 0);
6247 if (GET_CODE (inside) == CONST)
6248 inside = XEXP (inside, 0);
6249
6250 if (GET_CODE (inside) == LABEL_REF)
6251 return 1;
6252
6253 if (GET_CODE (inside) == PLUS
6254 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
6255 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
6256 return 1;
6257
6258 /* Only post inc allowed. */
6259 if (GET_CODE (inside) == PRE_DEC)
6260 return 0;
6261 }
6262
6263 if ((mode == QImode || mode == HImode)
6264 && (GET_CODE (op) == SUBREG
6265 && GET_CODE (XEXP (op, 0)) == REG
6266 && system_reg_operand (XEXP (op, 0), mode)))
6267 return 0;
6268
6269 return general_operand (op, mode);
6270 }
6271
6272 /* Returns 1 if OP can be a destination of a move.
6273 Same as general_operand, but no preinc allowed. */
6274
6275 int
6276 general_movdst_operand (op, mode)
6277 rtx op;
6278 enum machine_mode mode;
6279 {
6280 /* Only pre dec allowed. */
6281 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
6282 return 0;
6283
6284 return general_operand (op, mode);
6285 }
6286
6287 /* Returns 1 if OP is a normal arithmetic register. */
6288
6289 int
6290 arith_reg_operand (op, mode)
6291 rtx op;
6292 enum machine_mode mode;
6293 {
6294 if (register_operand (op, mode))
6295 {
6296 int regno;
6297
6298 if (GET_CODE (op) == REG)
6299 regno = REGNO (op);
6300 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6301 regno = REGNO (SUBREG_REG (op));
6302 else
6303 return 1;
6304
6305 return (regno != T_REG && regno != PR_REG
6306 && ! TARGET_REGISTER_P (regno)
6307 && (regno != FPUL_REG || TARGET_SH4)
6308 && regno != MACH_REG && regno != MACL_REG);
6309 }
6310 return 0;
6311 }
6312
6313 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
6314 because this would lead to missing sign extensions when truncating from
6315 DImode to SImode. */
6316 int
6317 arith_reg_dest (op, mode)
6318 rtx op;
6319 enum machine_mode mode;
6320 {
6321 if (mode == DImode && GET_CODE (op) == SUBREG
6322 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
6323 return 0;
6324 return arith_reg_operand (op, mode);
6325 }
6326
6327 int
6328 int_gpr_dest (op, mode)
6329 rtx op;
6330 enum machine_mode mode ATTRIBUTE_UNUSED;
6331 {
6332 enum machine_mode op_mode = GET_MODE (op);
6333
6334 if (GET_MODE_CLASS (op_mode) != MODE_INT
6335 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
6336 return 0;
6337 if (! reload_completed)
6338 return 0;
6339 return true_regnum (op) <= LAST_GENERAL_REG;
6340 }
6341
6342 int
6343 fp_arith_reg_operand (op, mode)
6344 rtx op;
6345 enum machine_mode mode;
6346 {
6347 if (register_operand (op, mode))
6348 {
6349 int regno;
6350
6351 if (GET_CODE (op) == REG)
6352 regno = REGNO (op);
6353 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
6354 regno = REGNO (SUBREG_REG (op));
6355 else
6356 return 1;
6357
6358 return (regno >= FIRST_PSEUDO_REGISTER
6359 || FP_REGISTER_P (regno));
6360 }
6361 return 0;
6362 }
6363
6364 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
6365
6366 int
6367 arith_operand (op, mode)
6368 rtx op;
6369 enum machine_mode mode;
6370 {
6371 if (arith_reg_operand (op, mode))
6372 return 1;
6373
6374 if (TARGET_SHMEDIA)
6375 {
6376 /* FIXME: We should be checking whether the CONST_INT fits in a
6377 CONST_OK_FOR_J here, but this causes reload_cse to crash when
6378 attempting to transform a sequence of two 64-bit sets of the
6379 same register from literal constants into a set and an add,
6380 when the difference is too wide for an add. */
6381 if (GET_CODE (op) == CONST_INT
6382 || EXTRA_CONSTRAINT_S (op))
6383 return 1;
6384 else
6385 return 0;
6386 }
6387 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
6388 return 1;
6389
6390 return 0;
6391 }
6392
6393 /* Returns 1 if OP is a valid source operand for a compare insn. */
6394
6395 int
6396 arith_reg_or_0_operand (op, mode)
6397 rtx op;
6398 enum machine_mode mode;
6399 {
6400 if (arith_reg_operand (op, mode))
6401 return 1;
6402
6403 if (EXTRA_CONSTRAINT_U (op))
6404 return 1;
6405
6406 return 0;
6407 }
6408
6409 /* Return 1 if OP is a valid source operand for an SHmedia operation
6410 that takes either a register or a 6-bit immediate. */
6411
6412 int
6413 shmedia_6bit_operand (op, mode)
6414 rtx op;
6415 enum machine_mode mode;
6416 {
6417 return (arith_reg_operand (op, mode)
6418 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_O (INTVAL (op))));
6419 }
6420
6421 /* Returns 1 if OP is a valid source operand for a logical operation. */
6422
6423 int
6424 logical_operand (op, mode)
6425 rtx op;
6426 enum machine_mode mode;
6427 {
6428 if (arith_reg_operand (op, mode))
6429 return 1;
6430
6431 if (TARGET_SHMEDIA)
6432 {
6433 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_P (INTVAL (op)))
6434 return 1;
6435 else
6436 return 0;
6437 }
6438 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
6439 return 1;
6440
6441 return 0;
6442 }
6443
6444 int
6445 and_operand (op, mode)
6446 rtx op;
6447 enum machine_mode mode;
6448 {
6449 if (logical_operand (op, mode))
6450 return 1;
6451
6452 /* Check mshflo.l / mshflhi.l opportunities. */
6453 if (TARGET_SHMEDIA
6454 && mode == DImode
6455 && GET_CODE (op) == CONST_INT
6456 && (INTVAL (op) == (unsigned) 0xffffffff
6457 || INTVAL (op) == (HOST_WIDE_INT) -1 << 32))
6458 return 1;
6459
6460 return 0;
6461 }
6462
6463 /* Nonzero if OP is a floating point value with value 0.0. */
6464
6465 int
6466 fp_zero_operand (op)
6467 rtx op;
6468 {
6469 REAL_VALUE_TYPE r;
6470
6471 if (GET_MODE (op) != SFmode)
6472 return 0;
6473
6474 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6475 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
6476 }
6477
6478 /* Nonzero if OP is a floating point value with value 1.0. */
6479
6480 int
6481 fp_one_operand (op)
6482 rtx op;
6483 {
6484 REAL_VALUE_TYPE r;
6485
6486 if (GET_MODE (op) != SFmode)
6487 return 0;
6488
6489 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
6490 return REAL_VALUES_EQUAL (r, dconst1);
6491 }
6492
6493 /* For -m4 and -m4-single-only, mode switching is used. If we are
6494 compiling without -mfmovd, movsf_ie isn't taken into account for
6495 mode switching. We could check in machine_dependent_reorg for
6496 cases where we know we are in single precision mode, but there is
6497 interface to find that out during reload, so we must avoid
6498 choosing an fldi alternative during reload and thus failing to
6499 allocate a scratch register for the constant loading. */
6500 int
6501 fldi_ok ()
6502 {
6503 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
6504 }
6505
6506 int
6507 tertiary_reload_operand (op, mode)
6508 rtx op;
6509 enum machine_mode mode ATTRIBUTE_UNUSED;
6510 {
6511 enum rtx_code code = GET_CODE (op);
6512 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
6513 }
6514
6515 int
6516 fpscr_operand (op, mode)
6517 rtx op;
6518 enum machine_mode mode ATTRIBUTE_UNUSED;
6519 {
6520 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
6521 && GET_MODE (op) == PSImode);
6522 }
6523
6524 int
6525 fpul_operand (op, mode)
6526 rtx op;
6527 enum machine_mode mode;
6528 {
6529 if (TARGET_SHMEDIA)
6530 return fp_arith_reg_operand (op, mode);
6531
6532 return (GET_CODE (op) == REG
6533 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
6534 && GET_MODE (op) == mode);
6535 }
6536
6537 int
6538 symbol_ref_operand (op, mode)
6539 rtx op;
6540 enum machine_mode mode ATTRIBUTE_UNUSED;
6541 {
6542 return (GET_CODE (op) == SYMBOL_REF);
6543 }
6544
6545 /* Return the TLS type for TLS symbols, 0 for otherwise. */
6546 int
6547 tls_symbolic_operand (op, mode)
6548 rtx op;
6549 enum machine_mode mode ATTRIBUTE_UNUSED;
6550 {
6551 if (GET_CODE (op) != SYMBOL_REF)
6552 return 0;
6553 return SYMBOL_REF_TLS_MODEL (op);
6554 }
6555
6556 int
6557 commutative_float_operator (op, mode)
6558 rtx op;
6559 enum machine_mode mode;
6560 {
6561 if (GET_MODE (op) != mode)
6562 return 0;
6563 switch (GET_CODE (op))
6564 {
6565 case PLUS:
6566 case MULT:
6567 return 1;
6568 default:
6569 break;
6570 }
6571 return 0;
6572 }
6573
6574 int
6575 noncommutative_float_operator (op, mode)
6576 rtx op;
6577 enum machine_mode mode;
6578 {
6579 if (GET_MODE (op) != mode)
6580 return 0;
6581 switch (GET_CODE (op))
6582 {
6583 case MINUS:
6584 case DIV:
6585 return 1;
6586 default:
6587 break;
6588 }
6589 return 0;
6590 }
6591
6592 int
6593 unary_float_operator (op, mode)
6594 rtx op;
6595 enum machine_mode mode;
6596 {
6597 if (GET_MODE (op) != mode)
6598 return 0;
6599 switch (GET_CODE (op))
6600 {
6601 case ABS:
6602 case NEG:
6603 case SQRT:
6604 return 1;
6605 default:
6606 break;
6607 }
6608 return 0;
6609 }
6610
6611 int
6612 binary_float_operator (op, mode)
6613 rtx op;
6614 enum machine_mode mode;
6615 {
6616 if (GET_MODE (op) != mode)
6617 return 0;
6618 switch (GET_CODE (op))
6619 {
6620 case PLUS:
6621 case MINUS:
6622 case MULT:
6623 case DIV:
6624 return 1;
6625 default:
6626 break;
6627 }
6628 return 0;
6629 }
6630
6631 int
6632 binary_logical_operator (op, mode)
6633 rtx op;
6634 enum machine_mode mode;
6635 {
6636 if (GET_MODE (op) != mode)
6637 return 0;
6638 switch (GET_CODE (op))
6639 {
6640 case IOR:
6641 case AND:
6642 case XOR:
6643 return 1;
6644 default:
6645 break;
6646 }
6647 return 0;
6648 }
6649
6650 int
6651 equality_comparison_operator (op, mode)
6652 rtx op;
6653 enum machine_mode mode;
6654 {
6655 return ((mode == VOIDmode || GET_MODE (op) == mode)
6656 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
6657 }
6658
6659 int greater_comparison_operator (op, mode)
6660 rtx op;
6661 enum machine_mode mode;
6662 {
6663 if (mode != VOIDmode && GET_MODE (op) == mode)
6664 return 0;
6665 switch (GET_CODE (op))
6666 {
6667 case GT:
6668 case GE:
6669 case GTU:
6670 case GEU:
6671 return 1;
6672 default:
6673 return 0;
6674 }
6675 }
6676
6677 int less_comparison_operator (op, mode)
6678 rtx op;
6679 enum machine_mode mode;
6680 {
6681 if (mode != VOIDmode && GET_MODE (op) == mode)
6682 return 0;
6683 switch (GET_CODE (op))
6684 {
6685 case LT:
6686 case LE:
6687 case LTU:
6688 case LEU:
6689 return 1;
6690 default:
6691 return 0;
6692 }
6693 }
6694
6695 /* Accept pseudos and branch target registers. */
6696 int
6697 target_reg_operand (op, mode)
6698 rtx op;
6699 enum machine_mode mode;
6700 {
6701 if (mode != DImode
6702 || GET_MODE (op) != DImode)
6703 return 0;
6704
6705 if (GET_CODE (op) == SUBREG)
6706 op = XEXP (op, 0);
6707
6708 if (GET_CODE (op) != REG)
6709 return 0;
6710
6711 /* We must protect ourselves from matching pseudos that are virtual
6712 register, because they will eventually be replaced with hardware
6713 registers that aren't branch-target registers. */
6714 if (REGNO (op) > LAST_VIRTUAL_REGISTER
6715 || TARGET_REGISTER_P (REGNO (op)))
6716 return 1;
6717
6718 return 0;
6719 }
6720
6721 /* Same as target_reg_operand, except that label_refs and symbol_refs
6722 are accepted before reload. */
6723 int
6724 target_operand (op, mode)
6725 rtx op;
6726 enum machine_mode mode;
6727 {
6728 if (mode != DImode)
6729 return 0;
6730
6731 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
6732 && EXTRA_CONSTRAINT_T (op))
6733 return ! reload_completed;
6734
6735 return target_reg_operand (op, mode);
6736 }
6737
6738 int
6739 mextr_bit_offset (op, mode)
6740 rtx op;
6741 enum machine_mode mode ATTRIBUTE_UNUSED;
6742 {
6743 HOST_WIDE_INT i;
6744
6745 if (GET_CODE (op) != CONST_INT)
6746 return 0;
6747 i = INTVAL (op);
6748 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
6749 }
6750
6751 int
6752 extend_reg_operand (op, mode)
6753 rtx op;
6754 enum machine_mode mode;
6755 {
6756 return (GET_CODE (op) == TRUNCATE
6757 ? arith_operand
6758 : arith_reg_operand) (op, mode);
6759 }
6760
6761 int
6762 trunc_hi_operand (op, mode)
6763 rtx op;
6764 enum machine_mode mode;
6765 {
6766 enum machine_mode op_mode = GET_MODE (op);
6767
6768 if (op_mode != SImode && op_mode != DImode
6769 && op_mode != V4HImode && op_mode != V2SImode)
6770 return 0;
6771 return extend_reg_operand (op, mode);
6772 }
6773
6774 int
6775 extend_reg_or_0_operand (op, mode)
6776 rtx op;
6777 enum machine_mode mode;
6778 {
6779 return (GET_CODE (op) == TRUNCATE
6780 ? arith_operand
6781 : arith_reg_or_0_operand) (op, mode);
6782 }
6783
6784 int
6785 general_extend_operand (op, mode)
6786 rtx op;
6787 enum machine_mode mode;
6788 {
6789 return (GET_CODE (op) == TRUNCATE
6790 ? arith_operand
6791 : nonimmediate_operand) (op, mode);
6792 }
6793
6794 int
6795 inqhi_operand (op, mode)
6796 rtx op;
6797 enum machine_mode mode;
6798 {
6799 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
6800 return 0;
6801 op = XEXP (op, 0);
6802 /* Can't use true_regnum here because copy_cost wants to know about
6803 SECONDARY_INPUT_RELOAD_CLASS. */
6804 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
6805 }
6806
6807 int
6808 sh_rep_vec (v, mode)
6809 rtx v;
6810 enum machine_mode mode;
6811 {
6812 int i;
6813 rtx x, y;
6814
6815 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
6816 || (GET_MODE (v) != mode && mode != VOIDmode))
6817 return 0;
6818 i = XVECLEN (v, 0) - 2;
6819 x = XVECEXP (v, 0, i + 1);
6820 if (GET_MODE_UNIT_SIZE (mode) == 1)
6821 {
6822 y = XVECEXP (v, 0, i);
6823 for (i -= 2 ; i >= 0; i -= 2)
6824 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
6825 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
6826 return 0;
6827 }
6828 else
6829 for (; i >= 0; i--)
6830 if (XVECEXP (v, 0, i) != x)
6831 return 0;
6832 return 1;
6833 }
6834
6835 /* Determine if V is a constant vector matching MODE with only one element
6836 that is not a sign extension. Two byte-sized elements count as one. */
6837 int
6838 sh_1el_vec (v, mode)
6839 rtx v;
6840 enum machine_mode mode;
6841 {
6842 int unit_size;
6843 int i, last, least, sign_ix;
6844 rtx sign;
6845
6846 if (GET_CODE (v) != CONST_VECTOR
6847 || (GET_MODE (v) != mode && mode != VOIDmode))
6848 return 0;
6849 /* Determine numbers of last and of least significant elements. */
6850 last = XVECLEN (v, 0) - 1;
6851 least = TARGET_LITTLE_ENDIAN ? 0 : last;
6852 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
6853 return 0;
6854 sign_ix = least;
6855 if (GET_MODE_UNIT_SIZE (mode) == 1)
6856 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
6857 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
6858 return 0;
6859 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
6860 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
6861 ? constm1_rtx : const0_rtx);
6862 i = XVECLEN (v, 0) - 1;
6863 do
6864 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
6865 return 0;
6866 while (--i);
6867 return 1;
6868 }
6869
6870 int
6871 sh_const_vec (v, mode)
6872 rtx v;
6873 enum machine_mode mode;
6874 {
6875 int i;
6876
6877 if (GET_CODE (v) != CONST_VECTOR
6878 || (GET_MODE (v) != mode && mode != VOIDmode))
6879 return 0;
6880 i = XVECLEN (v, 0) - 1;
6881 for (; i >= 0; i--)
6882 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
6883 return 0;
6884 return 1;
6885 }
6886 \f
6887 /* Return the destination address of a branch. */
6888
6889 static int
6890 branch_dest (branch)
6891 rtx branch;
6892 {
6893 rtx dest = SET_SRC (PATTERN (branch));
6894 int dest_uid;
6895
6896 if (GET_CODE (dest) == IF_THEN_ELSE)
6897 dest = XEXP (dest, 1);
6898 dest = XEXP (dest, 0);
6899 dest_uid = INSN_UID (dest);
6900 return INSN_ADDRESSES (dest_uid);
6901 }
6902 \f
6903 /* Return nonzero if REG is not used after INSN.
6904 We assume REG is a reload reg, and therefore does
6905 not live past labels. It may live past calls or jumps though. */
6906 int
6907 reg_unused_after (reg, insn)
6908 rtx reg;
6909 rtx insn;
6910 {
6911 enum rtx_code code;
6912 rtx set;
6913
6914 /* If the reg is set by this instruction, then it is safe for our
6915 case. Disregard the case where this is a store to memory, since
6916 we are checking a register used in the store address. */
6917 set = single_set (insn);
6918 if (set && GET_CODE (SET_DEST (set)) != MEM
6919 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6920 return 1;
6921
6922 while ((insn = NEXT_INSN (insn)))
6923 {
6924 code = GET_CODE (insn);
6925
6926 #if 0
6927 /* If this is a label that existed before reload, then the register
6928 if dead here. However, if this is a label added by reorg, then
6929 the register may still be live here. We can't tell the difference,
6930 so we just ignore labels completely. */
6931 if (code == CODE_LABEL)
6932 return 1;
6933 /* else */
6934 #endif
6935
6936 if (code == JUMP_INSN)
6937 return 0;
6938
6939 /* If this is a sequence, we must handle them all at once.
6940 We could have for instance a call that sets the target register,
6941 and an insn in a delay slot that uses the register. In this case,
6942 we must return 0. */
6943 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
6944 {
6945 int i;
6946 int retval = 0;
6947
6948 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
6949 {
6950 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
6951 rtx set = single_set (this_insn);
6952
6953 if (GET_CODE (this_insn) == CALL_INSN)
6954 code = CALL_INSN;
6955 else if (GET_CODE (this_insn) == JUMP_INSN)
6956 {
6957 if (INSN_ANNULLED_BRANCH_P (this_insn))
6958 return 0;
6959 code = JUMP_INSN;
6960 }
6961
6962 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6963 return 0;
6964 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6965 {
6966 if (GET_CODE (SET_DEST (set)) != MEM)
6967 retval = 1;
6968 else
6969 return 0;
6970 }
6971 if (set == 0
6972 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
6973 return 0;
6974 }
6975 if (retval == 1)
6976 return 1;
6977 else if (code == JUMP_INSN)
6978 return 0;
6979 }
6980 else if (GET_RTX_CLASS (code) == 'i')
6981 {
6982 rtx set = single_set (insn);
6983
6984 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
6985 return 0;
6986 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
6987 return GET_CODE (SET_DEST (set)) != MEM;
6988 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
6989 return 0;
6990 }
6991
6992 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
6993 return 1;
6994 }
6995 return 1;
6996 }
6997 \f
6998 #include "ggc.h"
6999
7000 static GTY(()) rtx fpscr_rtx;
7001 rtx
7002 get_fpscr_rtx ()
7003 {
7004 if (! fpscr_rtx)
7005 {
7006 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7007 REG_USERVAR_P (fpscr_rtx) = 1;
7008 mark_user_reg (fpscr_rtx);
7009 }
7010 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7011 mark_user_reg (fpscr_rtx);
7012 return fpscr_rtx;
7013 }
7014
7015 void
7016 emit_sf_insn (pat)
7017 rtx pat;
7018 {
7019 emit_insn (pat);
7020 }
7021
7022 void
7023 emit_df_insn (pat)
7024 rtx pat;
7025 {
7026 emit_insn (pat);
7027 }
7028
7029 void
7030 expand_sf_unop (fun, operands)
7031 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7032 rtx *operands;
7033 {
7034 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7035 }
7036
7037 void
7038 expand_sf_binop (fun, operands)
7039 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7040 rtx *operands;
7041 {
7042 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7043 get_fpscr_rtx ()));
7044 }
7045
7046 void
7047 expand_df_unop (fun, operands)
7048 rtx (*fun) PARAMS ((rtx, rtx, rtx));
7049 rtx *operands;
7050 {
7051 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7052 }
7053
7054 void
7055 expand_df_binop (fun, operands)
7056 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
7057 rtx *operands;
7058 {
7059 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7060 get_fpscr_rtx ()));
7061 }
7062 \f
7063 /* ??? gcc does flow analysis strictly after common subexpression
7064 elimination. As a result, common subexpression elimination fails
7065 when there are some intervening statements setting the same register.
7066 If we did nothing about this, this would hurt the precision switching
7067 for SH4 badly. There is some cse after reload, but it is unable to
7068 undo the extra register pressure from the unused instructions, and
7069 it cannot remove auto-increment loads.
7070
7071 A C code example that shows this flow/cse weakness for (at least) SH
7072 and sparc (as of gcc ss-970706) is this:
7073
7074 double
7075 f(double a)
7076 {
7077 double d;
7078 d = 0.1;
7079 a += d;
7080 d = 1.1;
7081 d = 0.1;
7082 a *= d;
7083 return a;
7084 }
7085
7086 So we add another pass before common subexpression elimination, to
7087 remove assignments that are dead due to a following assignment in the
7088 same basic block. */
7089
7090 static void
7091 mark_use (x, reg_set_block)
7092 rtx x, *reg_set_block;
7093 {
7094 enum rtx_code code;
7095
7096 if (! x)
7097 return;
7098 code = GET_CODE (x);
7099 switch (code)
7100 {
7101 case REG:
7102 {
7103 int regno = REGNO (x);
7104 int nregs = (regno < FIRST_PSEUDO_REGISTER
7105 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7106 : 1);
7107 do
7108 {
7109 reg_set_block[regno + nregs - 1] = 0;
7110 }
7111 while (--nregs);
7112 break;
7113 }
7114 case SET:
7115 {
7116 rtx dest = SET_DEST (x);
7117
7118 if (GET_CODE (dest) == SUBREG)
7119 dest = SUBREG_REG (dest);
7120 if (GET_CODE (dest) != REG)
7121 mark_use (dest, reg_set_block);
7122 mark_use (SET_SRC (x), reg_set_block);
7123 break;
7124 }
7125 case CLOBBER:
7126 break;
7127 default:
7128 {
7129 const char *fmt = GET_RTX_FORMAT (code);
7130 int i, j;
7131 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7132 {
7133 if (fmt[i] == 'e')
7134 mark_use (XEXP (x, i), reg_set_block);
7135 else if (fmt[i] == 'E')
7136 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7137 mark_use (XVECEXP (x, i, j), reg_set_block);
7138 }
7139 break;
7140 }
7141 }
7142 }
7143 \f
7144 static rtx get_free_reg PARAMS ((HARD_REG_SET));
7145
7146 /* This function returns a register to use to load the address to load
7147 the fpscr from. Currently it always returns r1 or r7, but when we are
7148 able to use pseudo registers after combine, or have a better mechanism
7149 for choosing a register, it should be done here. */
7150 /* REGS_LIVE is the liveness information for the point for which we
7151 need this allocation. In some bare-bones exit blocks, r1 is live at the
7152 start. We can even have all of r0..r3 being live:
7153 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7154 INSN before which new insns are placed with will clobber the register
7155 we return. If a basic block consists only of setting the return value
7156 register to a pseudo and using that register, the return value is not
7157 live before or after this block, yet we we'll insert our insns right in
7158 the middle. */
7159
7160 static rtx
7161 get_free_reg (regs_live)
7162 HARD_REG_SET regs_live;
7163 {
7164 if (! TEST_HARD_REG_BIT (regs_live, 1))
7165 return gen_rtx_REG (Pmode, 1);
7166
7167 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7168 there shouldn't be anything but a jump before the function end. */
7169 if (! TEST_HARD_REG_BIT (regs_live, 7))
7170 return gen_rtx_REG (Pmode, 7);
7171
7172 abort ();
7173 }
7174
7175 /* This function will set the fpscr from memory.
7176 MODE is the mode we are setting it to. */
7177 void
7178 fpscr_set_from_mem (mode, regs_live)
7179 int mode;
7180 HARD_REG_SET regs_live;
7181 {
7182 enum attr_fp_mode fp_mode = mode;
7183 rtx addr_reg = get_free_reg (regs_live);
7184
7185 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7186 emit_insn (gen_fpu_switch1 (addr_reg));
7187 else
7188 emit_insn (gen_fpu_switch0 (addr_reg));
7189 }
7190
7191 /* Is the given character a logical line separator for the assembler? */
7192 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7193 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7194 #endif
7195
7196 int
7197 sh_insn_length_adjustment (insn)
7198 rtx insn;
7199 {
7200 /* Instructions with unfilled delay slots take up an extra two bytes for
7201 the nop in the delay slot. */
7202 if (((GET_CODE (insn) == INSN
7203 && GET_CODE (PATTERN (insn)) != USE
7204 && GET_CODE (PATTERN (insn)) != CLOBBER)
7205 || GET_CODE (insn) == CALL_INSN
7206 || (GET_CODE (insn) == JUMP_INSN
7207 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7208 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7209 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7210 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7211 return 2;
7212
7213 /* SH2e has a bug that prevents the use of annulled branches, so if
7214 the delay slot is not filled, we'll have to put a NOP in it. */
7215 if (sh_cpu == CPU_SH2E
7216 && GET_CODE (insn) == JUMP_INSN
7217 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7218 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7219 && get_attr_type (insn) == TYPE_CBRANCH
7220 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7221 return 2;
7222
7223 /* sh-dsp parallel processing insn take four bytes instead of two. */
7224
7225 if (GET_CODE (insn) == INSN)
7226 {
7227 int sum = 0;
7228 rtx body = PATTERN (insn);
7229 const char *template;
7230 char c;
7231 int maybe_label = 1;
7232
7233 if (GET_CODE (body) == ASM_INPUT)
7234 template = XSTR (body, 0);
7235 else if (asm_noperands (body) >= 0)
7236 template
7237 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7238 else
7239 return 0;
7240 do
7241 {
7242 int ppi_adjust = 0;
7243
7244 do
7245 c = *template++;
7246 while (c == ' ' || c == '\t');
7247 /* all sh-dsp parallel-processing insns start with p.
7248 The only non-ppi sh insn starting with p is pref.
7249 The only ppi starting with pr is prnd. */
7250 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7251 ppi_adjust = 2;
7252 /* The repeat pseudo-insn expands two three insns, a total of
7253 six bytes in size. */
7254 else if ((c == 'r' || c == 'R')
7255 && ! strncasecmp ("epeat", template, 5))
7256 ppi_adjust = 4;
7257 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7258 {
7259 /* If this is a label, it is obviously not a ppi insn. */
7260 if (c == ':' && maybe_label)
7261 {
7262 ppi_adjust = 0;
7263 break;
7264 }
7265 else if (c == '\'' || c == '"')
7266 maybe_label = 0;
7267 c = *template++;
7268 }
7269 sum += ppi_adjust;
7270 maybe_label = c != ':';
7271 }
7272 while (c);
7273 return sum;
7274 }
7275 return 0;
7276 }
7277 \f
7278 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7279 isn't protected by a PIC unspec. */
7280 int
7281 nonpic_symbol_mentioned_p (x)
7282 rtx x;
7283 {
7284 register const char *fmt;
7285 register int i;
7286
7287 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
7288 || GET_CODE (x) == PC)
7289 return 1;
7290
7291 /* We don't want to look into the possible MEM location of a
7292 CONST_DOUBLE, since we're not going to use it, in general. */
7293 if (GET_CODE (x) == CONST_DOUBLE)
7294 return 0;
7295
7296 if (GET_CODE (x) == UNSPEC
7297 && (XINT (x, 1) == UNSPEC_PIC
7298 || XINT (x, 1) == UNSPEC_GOT
7299 || XINT (x, 1) == UNSPEC_GOTOFF
7300 || XINT (x, 1) == UNSPEC_GOTPLT
7301 || XINT (x, 1) == UNSPEC_GOTTPOFF
7302 || XINT (x, 1) == UNSPEC_DTPOFF
7303 || XINT (x, 1) == UNSPEC_PLT))
7304 return 0;
7305
7306 fmt = GET_RTX_FORMAT (GET_CODE (x));
7307 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7308 {
7309 if (fmt[i] == 'E')
7310 {
7311 register int j;
7312
7313 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7314 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
7315 return 1;
7316 }
7317 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
7318 return 1;
7319 }
7320
7321 return 0;
7322 }
7323
7324 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
7325 @GOTOFF in `reg'. */
7326 rtx
7327 legitimize_pic_address (orig, mode, reg)
7328 rtx orig;
7329 enum machine_mode mode ATTRIBUTE_UNUSED;
7330 rtx reg;
7331 {
7332 if (tls_symbolic_operand (orig, Pmode))
7333 return orig;
7334
7335 if (GET_CODE (orig) == LABEL_REF
7336 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
7337 {
7338 if (reg == 0)
7339 reg = gen_reg_rtx (Pmode);
7340
7341 emit_insn (gen_symGOTOFF2reg (reg, orig));
7342 return reg;
7343 }
7344 else if (GET_CODE (orig) == SYMBOL_REF)
7345 {
7346 if (reg == 0)
7347 reg = gen_reg_rtx (Pmode);
7348
7349 emit_insn (gen_symGOT2reg (reg, orig));
7350 return reg;
7351 }
7352 return orig;
7353 }
7354
7355 /* Mark the use of a constant in the literal table. If the constant
7356 has multiple labels, make it unique. */
7357 static rtx
7358 mark_constant_pool_use (x)
7359 rtx x;
7360 {
7361 rtx insn, lab, pattern;
7362
7363 if (x == NULL)
7364 return x;
7365
7366 switch (GET_CODE (x))
7367 {
7368 case LABEL_REF:
7369 x = XEXP (x, 0);
7370 case CODE_LABEL:
7371 break;
7372 default:
7373 return x;
7374 }
7375
7376 /* Get the first label in the list of labels for the same constant
7377 and delete another labels in the list. */
7378 lab = x;
7379 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
7380 {
7381 if (GET_CODE (insn) != CODE_LABEL
7382 || LABEL_REFS (insn) != NEXT_INSN (insn))
7383 break;
7384 lab = insn;
7385 }
7386
7387 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
7388 INSN_DELETED_P (insn) = 1;
7389
7390 /* Mark constants in a window. */
7391 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
7392 {
7393 if (GET_CODE (insn) != INSN)
7394 continue;
7395
7396 pattern = PATTERN (insn);
7397 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
7398 continue;
7399
7400 switch (XINT (pattern, 1))
7401 {
7402 case UNSPECV_CONST2:
7403 case UNSPECV_CONST4:
7404 case UNSPECV_CONST8:
7405 XVECEXP (pattern, 0, 1) = const1_rtx;
7406 break;
7407 case UNSPECV_WINDOW_END:
7408 if (XVECEXP (pattern, 0, 0) == x)
7409 return lab;
7410 break;
7411 case UNSPECV_CONST_END:
7412 return lab;
7413 default:
7414 break;
7415 }
7416 }
7417
7418 return lab;
7419 }
7420 \f
7421 /* Return true if it's possible to redirect BRANCH1 to the destination
7422 of an unconditional jump BRANCH2. We only want to do this if the
7423 resulting branch will have a short displacement. */
7424 int
7425 sh_can_redirect_branch (branch1, branch2)
7426 rtx branch1;
7427 rtx branch2;
7428 {
7429 if (flag_expensive_optimizations && simplejump_p (branch2))
7430 {
7431 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
7432 rtx insn;
7433 int distance;
7434
7435 for (distance = 0, insn = NEXT_INSN (branch1);
7436 insn && distance < 256;
7437 insn = PREV_INSN (insn))
7438 {
7439 if (insn == dest)
7440 return 1;
7441 else
7442 distance += get_attr_length (insn);
7443 }
7444 for (distance = 0, insn = NEXT_INSN (branch1);
7445 insn && distance < 256;
7446 insn = NEXT_INSN (insn))
7447 {
7448 if (insn == dest)
7449 return 1;
7450 else
7451 distance += get_attr_length (insn);
7452 }
7453 }
7454 return 0;
7455 }
7456
7457 /* Return nonzero if register old_reg can be renamed to register new_reg. */
7458 int
7459 sh_hard_regno_rename_ok (old_reg, new_reg)
7460 unsigned int old_reg ATTRIBUTE_UNUSED;
7461 unsigned int new_reg;
7462 {
7463
7464 /* Interrupt functions can only use registers that have already been
7465 saved by the prologue, even if they would normally be
7466 call-clobbered. */
7467
7468 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
7469 return 0;
7470
7471 return 1;
7472 }
7473
7474 /* Function to update the integer COST
7475 based on the relationship between INSN that is dependent on
7476 DEP_INSN through the dependence LINK. The default is to make no
7477 adjustment to COST. This can be used for example to specify to
7478 the scheduler that an output- or anti-dependence does not incur
7479 the same cost as a data-dependence. The return value should be
7480 the new value for COST. */
7481 static int
7482 sh_adjust_cost (insn, link, dep_insn, cost)
7483 rtx insn;
7484 rtx link ATTRIBUTE_UNUSED;
7485 rtx dep_insn;
7486 int cost;
7487 {
7488 rtx reg, use_pat;
7489
7490 if (TARGET_SHMEDIA)
7491 {
7492 /* On SHmedia, if the dependence is an anti-dependence or
7493 output-dependence, there is no cost. */
7494 if (REG_NOTE_KIND (link) != 0)
7495 cost = 0;
7496
7497 if (get_attr_is_mac_media (insn)
7498 && get_attr_is_mac_media (dep_insn))
7499 cost = 1;
7500 }
7501 else if (REG_NOTE_KIND (link) == 0)
7502 {
7503 enum attr_type dep_type, type;
7504
7505 if (recog_memoized (insn) < 0
7506 || recog_memoized (dep_insn) < 0)
7507 return cost;
7508
7509 dep_type = get_attr_type (dep_insn);
7510 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
7511 cost--;
7512 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
7513 && (type = get_attr_type (insn)) != TYPE_CALL
7514 && type != TYPE_SFUNC)
7515 cost--;
7516
7517 /* The only input for a call that is timing-critical is the
7518 function's address. */
7519 if (GET_CODE(insn) == CALL_INSN)
7520 {
7521 rtx call = PATTERN (insn);
7522
7523 if (GET_CODE (call) == PARALLEL)
7524 call = XVECEXP (call, 0 ,0);
7525 if (GET_CODE (call) == SET)
7526 call = SET_SRC (call);
7527 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
7528 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
7529 cost = 0;
7530 }
7531 /* Likewise, the most timing critical input for an sfuncs call
7532 is the function address. However, sfuncs typically start
7533 using their arguments pretty quickly.
7534 Assume a four cycle delay before they are needed. */
7535 /* All sfunc calls are parallels with at least four components.
7536 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
7537 else if (GET_CODE (PATTERN (insn)) == PARALLEL
7538 && XVECLEN (PATTERN (insn), 0) >= 4
7539 && (reg = sfunc_uses_reg (insn)))
7540 {
7541 if (! reg_set_p (reg, dep_insn))
7542 cost -= 4;
7543 }
7544 /* When the preceding instruction loads the shift amount of
7545 the following SHAD/SHLD, the latency of the load is increased
7546 by 1 cycle. */
7547 else if (TARGET_SH4
7548 && get_attr_type (insn) == TYPE_DYN_SHIFT
7549 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
7550 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
7551 XEXP (SET_SRC (single_set(insn)),
7552 1)))
7553 cost++;
7554 /* When an LS group instruction with a latency of less than
7555 3 cycles is followed by a double-precision floating-point
7556 instruction, FIPR, or FTRV, the latency of the first
7557 instruction is increased to 3 cycles. */
7558 else if (cost < 3
7559 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
7560 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
7561 cost = 3;
7562 /* The lsw register of a double-precision computation is ready one
7563 cycle earlier. */
7564 else if (reload_completed
7565 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
7566 && (use_pat = single_set (insn))
7567 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
7568 SET_SRC (use_pat)))
7569 cost -= 1;
7570
7571 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
7572 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
7573 cost -= 1;
7574 }
7575 /* An anti-dependence penalty of two applies if the first insn is a double
7576 precision fadd / fsub / fmul. */
7577 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7578 && recog_memoized (dep_insn) >= 0
7579 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
7580 /* A lot of alleged anti-flow dependences are fake,
7581 so check this one is real. */
7582 && flow_dependent_p (dep_insn, insn))
7583 cost = 2;
7584
7585
7586 return cost;
7587 }
7588
7589 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
7590 if DEP_INSN is anti-flow dependent on INSN. */
7591 static int
7592 flow_dependent_p (insn, dep_insn)
7593 rtx insn, dep_insn;
7594 {
7595 rtx tmp = PATTERN (insn);
7596
7597 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
7598 return tmp == NULL_RTX;
7599 }
7600
7601 /* A helper function for flow_dependent_p called through note_stores. */
7602 static void
7603 flow_dependent_p_1 (x, pat, data)
7604 rtx x;
7605 rtx pat ATTRIBUTE_UNUSED;
7606 void *data;
7607 {
7608 rtx * pinsn = (rtx *) data;
7609
7610 if (*pinsn && reg_referenced_p (x, *pinsn))
7611 *pinsn = NULL_RTX;
7612 }
7613
7614 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
7615 'special function' patterns (type sfunc) that clobber pr, but that
7616 do not look like function calls to leaf_function_p. Hence we must
7617 do this extra check. */
7618 int
7619 sh_pr_n_sets ()
7620 {
7621 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
7622 }
7623
7624 /* This Function returns nonzero if the DFA based scheduler interface
7625 is to be used. At present this is supported for the SH4 only. */
7626 static int
7627 sh_use_dfa_interface()
7628 {
7629 if (TARGET_HARD_SH4)
7630 return 1;
7631 else
7632 return 0;
7633 }
7634
7635 /* This function returns "2" to indicate dual issue for the SH4
7636 processor. To be used by the DFA pipeline description. */
7637 static int
7638 sh_issue_rate()
7639 {
7640 if (TARGET_SUPERSCALAR)
7641 return 2;
7642 else
7643 return 1;
7644 }
7645
7646 /* SHmedia requires registers for branches, so we can't generate new
7647 branches past reload. */
7648 static bool
7649 sh_cannot_modify_jumps_p ()
7650 {
7651 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
7652 }
7653
7654 static bool
7655 sh_ms_bitfield_layout_p (record_type)
7656 tree record_type ATTRIBUTE_UNUSED;
7657 {
7658 return TARGET_SH5;
7659 }
7660 \f
7661 /*
7662 On the SH1..SH4, the trampoline looks like
7663 2 0002 D202 mov.l l2,r2
7664 1 0000 D301 mov.l l1,r3
7665 3 0004 422B jmp @r2
7666 4 0006 0009 nop
7667 5 0008 00000000 l1: .long area
7668 6 000c 00000000 l2: .long function
7669
7670 SH5 (compact) uses r1 instead of r3 for the static chain. */
7671
7672
7673 /* Emit RTL insns to initialize the variable parts of a trampoline.
7674 FNADDR is an RTX for the address of the function's pure code.
7675 CXT is an RTX for the static chain value for the function. */
7676
7677 void
7678 sh_initialize_trampoline (tramp, fnaddr, cxt)
7679 rtx tramp, fnaddr, cxt;
7680 {
7681 if (TARGET_SHMEDIA64)
7682 {
7683 rtx tramp_templ;
7684 int fixed_len;
7685
7686 rtx movi1 = GEN_INT (0xcc000010);
7687 rtx shori1 = GEN_INT (0xc8000010);
7688 rtx src, dst;
7689
7690 /* The following trampoline works within a +- 128 KB range for cxt:
7691 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
7692 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
7693 gettr tr1,r1; blink tr0,r63 */
7694 /* Address rounding makes it hard to compute the exact bounds of the
7695 offset for this trampoline, but we have a rather generous offset
7696 range, so frame_offset should do fine as an upper bound. */
7697 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
7698 {
7699 /* ??? could optimize this trampoline initialization
7700 by writing DImode words with two insns each. */
7701 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
7702 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
7703 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
7704 insn = gen_rtx_AND (DImode, insn, mask);
7705 /* Or in ptb/u .,tr1 pattern */
7706 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
7707 insn = force_operand (insn, NULL_RTX);
7708 insn = gen_lowpart (SImode, insn);
7709 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
7710 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
7711 insn = gen_rtx_AND (DImode, insn, mask);
7712 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
7713 insn = gen_lowpart (SImode, insn);
7714 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
7715 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
7716 insn = gen_rtx_AND (DImode, insn, mask);
7717 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7718 insn = gen_lowpart (SImode, insn);
7719 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
7720 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
7721 insn = gen_rtx_AND (DImode, insn, mask);
7722 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7723 insn = gen_lowpart (SImode, insn);
7724 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7725 insn);
7726 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
7727 insn = gen_rtx_AND (DImode, insn, mask);
7728 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
7729 insn = gen_lowpart (SImode, insn);
7730 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
7731 insn);
7732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
7733 GEN_INT (0x6bf10600));
7734 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
7735 GEN_INT (0x4415fc10));
7736 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
7737 GEN_INT (0x4401fff0));
7738 emit_insn (gen_ic_invalidate_line (tramp));
7739 return;
7740 }
7741 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
7742 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
7743
7744 tramp_templ = gen_datalabel_ref (tramp_templ);
7745 dst = gen_rtx_MEM (BLKmode, tramp);
7746 src = gen_rtx_MEM (BLKmode, tramp_templ);
7747 set_mem_align (dst, 256);
7748 set_mem_align (src, 64);
7749 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
7750
7751 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
7752 fnaddr);
7753 emit_move_insn (gen_rtx_MEM (Pmode,
7754 plus_constant (tramp,
7755 fixed_len
7756 + GET_MODE_SIZE (Pmode))),
7757 cxt);
7758 emit_insn (gen_ic_invalidate_line (tramp));
7759 return;
7760 }
7761 else if (TARGET_SHMEDIA)
7762 {
7763 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
7764 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
7765 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
7766 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
7767 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
7768 rotated 10 right, and higher 16 bit of every 32 selected. */
7769 rtx movishori
7770 = force_reg (V2HImode, (simplify_gen_subreg
7771 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
7772 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
7773 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
7774
7775 tramp = force_reg (Pmode, tramp);
7776 fnaddr = force_reg (SImode, fnaddr);
7777 cxt = force_reg (SImode, cxt);
7778 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
7779 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
7780 movishori));
7781 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
7782 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7783 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
7784 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
7785 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
7786 gen_rtx_SUBREG (V2HImode, cxt, 0),
7787 movishori));
7788 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
7789 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
7790 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
7791 if (TARGET_LITTLE_ENDIAN)
7792 {
7793 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
7794 emit_insn (gen_mextr4 (quad2, cxtload, blink));
7795 }
7796 else
7797 {
7798 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
7799 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
7800 }
7801 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
7802 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
7803 emit_insn (gen_ic_invalidate_line (tramp));
7804 return;
7805 }
7806 else if (TARGET_SHCOMPACT)
7807 {
7808 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
7809 return;
7810 }
7811 emit_move_insn (gen_rtx_MEM (SImode, tramp),
7812 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
7813 SImode));
7814 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
7815 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
7816 SImode));
7817 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
7818 cxt);
7819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
7820 fnaddr);
7821 if (TARGET_HARVARD)
7822 {
7823 if (TARGET_USERMODE)
7824 emit_library_call (function_symbol ("__ic_invalidate"),
7825 0, VOIDmode, 1, tramp, SImode);
7826 else
7827 emit_insn (gen_ic_invalidate_line (tramp));
7828 }
7829 }
7830
7831 /* FIXME: This is overly conservative. A SHcompact function that
7832 receives arguments ``by reference'' will have them stored in its
7833 own stack frame, so it must not pass pointers or references to
7834 these arguments to other functions by means of sibling calls. */
7835 static bool
7836 sh_function_ok_for_sibcall (decl, exp)
7837 tree decl;
7838 tree exp ATTRIBUTE_UNUSED;
7839 {
7840 return (decl
7841 && (! TARGET_SHCOMPACT
7842 || current_function_args_info.stack_regs == 0)
7843 && ! sh_cfun_interrupt_handler_p ());
7844 }
7845 \f
7846 /* Machine specific built-in functions. */
7847
7848 struct builtin_description
7849 {
7850 const enum insn_code icode;
7851 const char *const name;
7852 int signature;
7853 };
7854
7855 /* describe number and signedness of arguments; arg[0] == result
7856 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
7857 static const char signature_args[][4] =
7858 {
7859 #define SH_BLTIN_V2SI2 0
7860 { 4, 4 },
7861 #define SH_BLTIN_V4HI2 1
7862 { 4, 4 },
7863 #define SH_BLTIN_V2SI3 2
7864 { 4, 4, 4 },
7865 #define SH_BLTIN_V4HI3 3
7866 { 4, 4, 4 },
7867 #define SH_BLTIN_V8QI3 4
7868 { 4, 4, 4 },
7869 #define SH_BLTIN_MAC_HISI 5
7870 { 1, 4, 4, 1 },
7871 #define SH_BLTIN_SH_HI 6
7872 { 4, 4, 1 },
7873 #define SH_BLTIN_SH_SI 7
7874 { 4, 4, 1 },
7875 #define SH_BLTIN_V4HI2V2SI 8
7876 { 4, 4, 4 },
7877 #define SH_BLTIN_V4HI2V8QI 9
7878 { 4, 4, 4 },
7879 #define SH_BLTIN_SISF 10
7880 { 4, 2 },
7881 #define SH_BLTIN_LDUA_L 11
7882 { 2, 8 },
7883 #define SH_BLTIN_LDUA_Q 12
7884 { 1, 8 },
7885 #define SH_BLTIN_STUA_L 13
7886 { 0, 8, 2 },
7887 #define SH_BLTIN_STUA_Q 14
7888 { 0, 8, 1 },
7889 #define SH_BLTIN_UDI 15
7890 { 0, 8, 1 },
7891 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
7892 #define SH_BLTIN_2 16
7893 #define SH_BLTIN_SU 16
7894 { 1, 2 },
7895 #define SH_BLTIN_3 17
7896 #define SH_BLTIN_SUS 17
7897 { 2, 2, 1 },
7898 #define SH_BLTIN_PSSV 18
7899 { 0, 8, 2, 2 },
7900 #define SH_BLTIN_XXUU 19
7901 #define SH_BLTIN_UUUU 19
7902 { 1, 1, 1, 1 },
7903 #define SH_BLTIN_PV 20
7904 { 0, 8 },
7905 };
7906 /* mcmv: operands considered unsigned. */
7907 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
7908 /* mperm: control value considered unsigned int. */
7909 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
7910 /* mshards_q: returns signed short. */
7911 /* nsb: takes long long arg, returns unsigned char. */
7912 static const struct builtin_description bdesc[] =
7913 {
7914 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
7915 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
7916 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
7917 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
7918 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
7919 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
7920 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
7921 #if 0
7922 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7923 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
7924 #endif
7925 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
7926 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
7927 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
7928 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
7929 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
7930 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
7931 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
7932 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
7933 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
7934 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
7935 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
7936 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
7937 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
7938 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
7939 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
7940 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
7941 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
7942 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
7943 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
7944 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
7945 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
7946 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
7947 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
7948 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
7949 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
7950 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
7951 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
7952 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
7953 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
7954 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
7955 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
7956 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
7957 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
7958 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
7959 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
7960 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
7961 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
7962 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
7963 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
7964 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
7965 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
7966 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
7967 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
7968 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
7969 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
7970 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
7971 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
7972 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
7973 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
7974 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
7975 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
7976 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
7977 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
7978 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
7979 #if 0
7980 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7981 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7982 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7983 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7984 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7985 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7986 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7987 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7988 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
7989 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
7990 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
7991 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
7992 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
7993 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
7994 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
7995 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
7996 #endif
7997 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
7998 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
7999 #if 0
8000 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8001 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8002 #endif
8003 };
8004
8005 static void
8006 sh_media_init_builtins ()
8007 {
8008 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8009 const struct builtin_description *d;
8010
8011 memset (shared, 0, sizeof shared);
8012 for (d = bdesc; d - bdesc < (int) (sizeof bdesc / sizeof bdesc[0]); d++)
8013 {
8014 tree type, arg_type;
8015 int signature = d->signature;
8016 int i;
8017
8018 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8019 type = shared[signature];
8020 else
8021 {
8022 int has_result = signature_args[signature][0] != 0;
8023
8024 if (signature_args[signature][1] == 8
8025 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8026 continue;
8027 if (! TARGET_FPU_ANY
8028 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8029 continue;
8030 type = void_list_node;
8031 for (i = 3; ; i--)
8032 {
8033 int arg = signature_args[signature][i];
8034 int opno = i - 1 + has_result;
8035
8036 if (arg == 8)
8037 arg_type = ptr_type_node;
8038 else if (arg)
8039 arg_type = ((*lang_hooks.types.type_for_mode)
8040 (insn_data[d->icode].operand[opno].mode,
8041 (arg & 1)));
8042 else if (i)
8043 continue;
8044 else
8045 arg_type = void_type_node;
8046 if (i == 0)
8047 break;
8048 type = tree_cons (NULL_TREE, arg_type, type);
8049 }
8050 type = build_function_type (arg_type, type);
8051 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8052 shared[signature] = type;
8053 }
8054 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8055 NULL, NULL_TREE);
8056 }
8057 }
8058
8059 static void
8060 sh_init_builtins ()
8061 {
8062 if (TARGET_SHMEDIA)
8063 sh_media_init_builtins ();
8064 }
8065
8066 /* Expand an expression EXP that calls a built-in function,
8067 with result going to TARGET if that's convenient
8068 (and in mode MODE if that's convenient).
8069 SUBTARGET may be used as the target for computing one of EXP's operands.
8070 IGNORE is nonzero if the value is to be ignored. */
8071
8072 static rtx
8073 sh_expand_builtin (exp, target, subtarget, mode, ignore)
8074 tree exp;
8075 rtx target;
8076 rtx subtarget ATTRIBUTE_UNUSED;
8077 enum machine_mode mode ATTRIBUTE_UNUSED;
8078 int ignore;
8079 {
8080 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8081 tree arglist = TREE_OPERAND (exp, 1);
8082 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8083 const struct builtin_description *d = &bdesc[fcode];
8084 enum insn_code icode = d->icode;
8085 int signature = d->signature;
8086 enum machine_mode tmode = VOIDmode;
8087 int nop = 0, i;
8088 rtx op[4];
8089 rtx pat;
8090
8091 if (signature_args[signature][0])
8092 {
8093 if (ignore)
8094 return 0;
8095
8096 tmode = insn_data[icode].operand[0].mode;
8097 if (! target
8098 || GET_MODE (target) != tmode
8099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8100 target = gen_reg_rtx (tmode);
8101 op[nop++] = target;
8102 }
8103 else
8104 target = 0;
8105
8106 for (i = 1; i <= 3; i++, nop++)
8107 {
8108 tree arg;
8109 enum machine_mode opmode, argmode;
8110
8111 if (! signature_args[signature][i])
8112 break;
8113 arg = TREE_VALUE (arglist);
8114 if (arg == error_mark_node)
8115 return const0_rtx;
8116 arglist = TREE_CHAIN (arglist);
8117 opmode = insn_data[icode].operand[nop].mode;
8118 argmode = TYPE_MODE (TREE_TYPE (arg));
8119 if (argmode != opmode)
8120 arg = build1 (NOP_EXPR,
8121 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8122 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8123 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8124 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8125 }
8126
8127 switch (nop)
8128 {
8129 case 1:
8130 pat = (*insn_data[d->icode].genfun) (op[0]);
8131 break;
8132 case 2:
8133 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8134 break;
8135 case 3:
8136 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8137 break;
8138 case 4:
8139 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8140 break;
8141 default:
8142 abort ();
8143 }
8144 if (! pat)
8145 return 0;
8146 emit_insn (pat);
8147 return target;
8148 }
8149
8150 void
8151 sh_expand_unop_v2sf (code, op0, op1)
8152 enum rtx_code code;
8153 rtx op0, op1;
8154 {
8155 rtx sel0 = const0_rtx;
8156 rtx sel1 = const1_rtx;
8157 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx)) = gen_unary_sf_op;
8158 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8159
8160 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8161 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8162 }
8163
8164 void
8165 sh_expand_binop_v2sf (code, op0, op1, op2)
8166 enum rtx_code code;
8167 rtx op0, op1, op2;
8168 {
8169 rtx sel0 = const0_rtx;
8170 rtx sel1 = const1_rtx;
8171 rtx (*fn) PARAMS ((rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx))
8172 = gen_binary_sf_op;
8173 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8174
8175 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8176 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8177 }
8178
8179 /* Return the class of registers for which a mode change from FROM to TO
8180 is invalid. */
8181 bool
8182 sh_cannot_change_mode_class (from, to, class)
8183 enum machine_mode from, to;
8184 enum reg_class class;
8185 {
8186 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8187 {
8188 if (TARGET_LITTLE_ENDIAN)
8189 {
8190 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8191 return reg_classes_intersect_p (DF_REGS, class);
8192 }
8193 else
8194 {
8195 if (GET_MODE_SIZE (from) < 8)
8196 return reg_classes_intersect_p (DF_HI_REGS, class);
8197 }
8198 }
8199 return 0;
8200 }
8201
8202
8203 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8204 that label is used. */
8205
8206 void
8207 sh_mark_label (address, nuses)
8208 rtx address;
8209 int nuses;
8210 {
8211 if (GOTOFF_P (address))
8212 {
8213 /* Extract the label or symbol. */
8214 address = XEXP (address, 0);
8215 if (GET_CODE (address) == PLUS)
8216 address = XEXP (address, 0);
8217 address = XVECEXP (address, 0, 0);
8218 }
8219 if (GET_CODE (address) == LABEL_REF
8220 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8221 LABEL_NUSES (XEXP (address, 0)) += nuses;
8222 }
8223
8224 /* Compute extra cost of moving data between one register class
8225 and another. */
8226
8227 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8228 uses this information. Hence, the general register <-> floating point
8229 register information here is not used for SFmode. */
8230
8231 int
8232 sh_register_move_cost (mode, srcclass, dstclass)
8233 enum machine_mode mode;
8234 enum reg_class srcclass, dstclass;
8235 {
8236 if (dstclass == T_REGS || dstclass == PR_REGS)
8237 return 10;
8238
8239 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8240 && REGCLASS_HAS_FP_REG (srcclass)
8241 && REGCLASS_HAS_FP_REG (dstclass))
8242 return 4;
8243
8244 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
8245 || (dstclass== MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
8246 return 9;
8247
8248 if ((REGCLASS_HAS_FP_REG (dstclass)
8249 && REGCLASS_HAS_GENERAL_REG (srcclass))
8250 || (REGCLASS_HAS_GENERAL_REG (dstclass)
8251 && REGCLASS_HAS_FP_REG (srcclass)))
8252 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
8253 * ((GET_MODE_SIZE (mode) + 7) / 8U));
8254
8255 if ((dstclass == FPUL_REGS
8256 && REGCLASS_HAS_GENERAL_REG (srcclass))
8257 || (srcclass == FPUL_REGS
8258 && REGCLASS_HAS_GENERAL_REG (dstclass)))
8259 return 5;
8260
8261 if ((dstclass == FPUL_REGS
8262 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
8263 || (srcclass == FPUL_REGS
8264 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
8265 return 7;
8266
8267 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8268 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8269 return 20;
8270
8271 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
8272 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
8273 return 4;
8274
8275 if (TARGET_SHMEDIA
8276 || (TARGET_FMOVD
8277 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
8278 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
8279 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
8280
8281 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
8282 }
8283
8284 /* Like register_operand, but take into account that SHMEDIA can use
8285 the constant zero like a general register. */
8286 int
8287 sh_register_operand (op, mode)
8288 rtx op;
8289 enum machine_mode mode;
8290 {
8291 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
8292 return 1;
8293 return register_operand (op, mode);
8294 }
8295
8296 static rtx emit_load_ptr PARAMS ((rtx, rtx));
8297
8298 static rtx
8299 emit_load_ptr (reg, addr)
8300 rtx reg, addr;
8301 {
8302 rtx mem = gen_rtx_MEM (ptr_mode, addr);
8303
8304 if (Pmode != ptr_mode)
8305 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
8306 return emit_move_insn (reg, mem);
8307 }
8308
8309 void
8310 sh_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
8311 FILE *file;
8312 tree thunk_fndecl ATTRIBUTE_UNUSED;
8313 HOST_WIDE_INT delta;
8314 HOST_WIDE_INT vcall_offset;
8315 tree function;
8316 {
8317 CUMULATIVE_ARGS cum;
8318 int structure_value_byref = 0;
8319 rtx this, this_value, sibcall, insns, funexp;
8320 tree funtype = TREE_TYPE (function);
8321 int simple_add
8322 = (TARGET_SHMEDIA ? CONST_OK_FOR_J (delta) : CONST_OK_FOR_I (delta));
8323 int did_load = 0;
8324 rtx scratch0, scratch1, scratch2;
8325
8326 reload_completed = 1;
8327 no_new_pseudos = 1;
8328 current_function_uses_only_leaf_regs = 1;
8329
8330 emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8331
8332 /* Find the "this" pointer. We have such a wide range of ABIs for the
8333 SH that it's best to do this completely machine independently.
8334 "this" is passed as first argument, unless a structure return pointer
8335 comes first, in which case "this" comes second. */
8336 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0);
8337 #ifndef PCC_STATIC_STRUCT_RETURN
8338 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
8339 structure_value_byref = 1;
8340 #endif /* not PCC_STATIC_STRUCT_RETURN */
8341 if (structure_value_byref && struct_value_rtx == 0)
8342 {
8343 tree ptype = build_pointer_type (TREE_TYPE (funtype));
8344
8345 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
8346 }
8347 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
8348
8349 /* For SHcompact, we only have r0 for a scratch register: r1 is the
8350 static chain pointer (even if you can't have nested virtual functions
8351 right now, someone might implement them sometime), and the rest of the
8352 registers are used for argument passing, are callee-saved, or reserved. */
8353 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
8354 if (! TARGET_SH5)
8355 {
8356 scratch1 = gen_rtx_REG (ptr_mode, 1);
8357 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
8358 pointing where to return struct values. */
8359 scratch2 = gen_rtx_REG (Pmode, 3);
8360 }
8361 else if (TARGET_SHMEDIA)
8362 {
8363 scratch1 = gen_rtx_REG (ptr_mode, 21);
8364 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
8365 }
8366
8367 this_value = plus_constant (this, delta);
8368 if (vcall_offset
8369 && (simple_add || scratch0 != scratch1)
8370 && strict_memory_address_p (ptr_mode, this_value))
8371 {
8372 emit_load_ptr (scratch0, this_value);
8373 did_load = 1;
8374 }
8375
8376 if (!delta)
8377 ; /* Do nothing. */
8378 else if (simple_add)
8379 emit_move_insn (this, this_value);
8380 else
8381 {
8382 emit_move_insn (scratch1, GEN_INT (delta));
8383 emit_insn (gen_add2_insn (this, scratch1));
8384 }
8385
8386 if (vcall_offset)
8387 {
8388 rtx offset_addr;
8389
8390 if (!did_load)
8391 emit_load_ptr (scratch0, this);
8392
8393 offset_addr = plus_constant (scratch0, vcall_offset);
8394 if (strict_memory_address_p (ptr_mode, offset_addr))
8395 ; /* Do nothing. */
8396 else if (! TARGET_SH5)
8397 {
8398 /* scratch0 != scratch1, and we have indexed loads. Get better
8399 schedule by loading the offset into r1 and using an indexed
8400 load - then the load of r1 can issue before the load from
8401 (this + delta) finishes. */
8402 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8403 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
8404 }
8405 else if (TARGET_SHMEDIA
8406 ? CONST_OK_FOR_J (vcall_offset)
8407 : CONST_OK_FOR_I (vcall_offset))
8408 {
8409 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
8410 offset_addr = scratch0;
8411 }
8412 else if (scratch0 != scratch1)
8413 {
8414 emit_move_insn (scratch1, GEN_INT (vcall_offset));
8415 emit_insn (gen_add2_insn (scratch0, scratch1));
8416 offset_addr = scratch0;
8417 }
8418 else
8419 abort (); /* FIXME */
8420 emit_load_ptr (scratch0, offset_addr);
8421
8422 if (Pmode != ptr_mode)
8423 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
8424 emit_insn (gen_add2_insn (this, scratch0));
8425 }
8426
8427 /* Generate a tail call to the target function. */
8428 if (! TREE_USED (function))
8429 {
8430 assemble_external (function);
8431 TREE_USED (function) = 1;
8432 }
8433 funexp = XEXP (DECL_RTL (function), 0);
8434 emit_move_insn (scratch2, funexp);
8435 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
8436 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
8437 SIBLING_CALL_P (sibcall) = 1;
8438 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
8439 emit_barrier ();
8440
8441 /* Run just enough of rest_of_compilation to do scheduling and get
8442 the insns emitted. Note that use_thunk calls
8443 assemble_start_function and assemble_end_function. */
8444
8445 insn_locators_initialize ();
8446 insns = get_insns ();
8447
8448 if (optimize > 0 && flag_schedule_insns_after_reload)
8449 {
8450
8451 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
8452 life_analysis (insns, rtl_dump_file, PROP_FINAL);
8453
8454 split_all_insns (1);
8455
8456 schedule_insns (rtl_dump_file);
8457 }
8458
8459 sh_reorg ();
8460
8461 if (optimize > 0 && flag_delayed_branch)
8462 dbr_schedule (insns, rtl_dump_file);
8463 shorten_branches (insns);
8464 final_start_function (insns, file, 1);
8465 final (insns, file, 1, 0);
8466 final_end_function ();
8467
8468 if (optimize > 0 && flag_schedule_insns_after_reload)
8469 {
8470 /* Release all memory allocated by flow. */
8471 free_basic_block_vars (0);
8472
8473 /* Release all memory held by regsets now. */
8474 regset_release_memory ();
8475 }
8476
8477 reload_completed = 0;
8478 no_new_pseudos = 0;
8479 }
8480
8481 rtx
8482 function_symbol (const char *name)
8483 {
8484 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
8485 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
8486 return sym;
8487 }
8488
8489 #include "gt-sh.h"
This page took 0.418904 seconds and 5 git commands to generate.