]> gcc.gnu.org Git - gcc.git/blob - gcc/config/sh/sh.c
darwin-tramp.asm: Fix comment formatting.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "function.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "c-pragma.h"
40 #include "integrate.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44
45 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
46
47 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
48 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
49
50 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
51 int current_function_interrupt;
52
53 /* ??? The pragma interrupt support will not work for SH3. */
54 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
55 output code for the next function appropriate for an interrupt handler. */
56 int pragma_interrupt;
57
58 /* This is set by the trap_exit attribute for functions. It specifies
59 a trap number to be used in a trapa instruction at function exit
60 (instead of an rte instruction). */
61 int trap_exit;
62
63 /* This is used by the sp_switch attribute for functions. It specifies
64 a variable holding the address of the stack the interrupt function
65 should switch to/from at entry/exit. */
66 rtx sp_switch;
67
68 /* This is set by #pragma trapa, and is similar to the above, except that
69 the compiler doesn't emit code to preserve all registers. */
70 static int pragma_trapa;
71
72 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
73 which has a separate set of low regs for User and Supervisor modes.
74 This should only be used for the lowest level of interrupts. Higher levels
75 of interrupts must save the registers in case they themselves are
76 interrupted. */
77 int pragma_nosave_low_regs;
78
79 /* This is used for communication between SETUP_INCOMING_VARARGS and
80 sh_expand_prologue. */
81 int current_function_anonymous_args;
82
83 /* Global variables for machine-dependent things. */
84
85 /* Which cpu are we scheduling for. */
86 enum processor_type sh_cpu;
87
88 /* Saved operands from the last compare to use when we generate an scc
89 or bcc insn. */
90
91 rtx sh_compare_op0;
92 rtx sh_compare_op1;
93
94 /* Provides the class number of the smallest class containing
95 reg number. */
96
97 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
98 {
99 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
100 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
101 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
102 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
103 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
104 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
105 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
106 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
107 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
108 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
109 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
110 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
111 FPSCR_REGS,
112 };
113
114 char fp_reg_names[][5] =
115 {
116 "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
117 "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
118 "fpul",
119 "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
120 };
121
122 /* Provide reg_class from a letter such as appears in the machine
123 description. */
124
125 const enum reg_class reg_class_from_letter[] =
126 {
127 /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
128 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
129 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
130 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
131 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
132 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
133 /* y */ FPUL_REGS, /* z */ R0_REGS
134 };
135
136 int assembler_dialect;
137
138 static void split_branches PARAMS ((rtx));
139 static int branch_dest PARAMS ((rtx));
140 static void force_into PARAMS ((rtx, rtx));
141 static void print_slot PARAMS ((rtx));
142 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
143 static void dump_table PARAMS ((rtx));
144 static int hi_const PARAMS ((rtx));
145 static int broken_move PARAMS ((rtx));
146 static int mova_p PARAMS ((rtx));
147 static rtx find_barrier PARAMS ((int, rtx, rtx));
148 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
149 static rtx gen_block_redirect PARAMS ((rtx, int, int));
150 static void output_stack_adjust PARAMS ((int, rtx, int));
151 static void push PARAMS ((int));
152 static void pop PARAMS ((int));
153 static void push_regs PARAMS ((int, int));
154 static int calc_live_regs PARAMS ((int *, int *));
155 static void mark_use PARAMS ((rtx, rtx *));
156 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
157 static rtx mark_constant_pool_use PARAMS ((rtx));
158 const struct attribute_spec sh_attribute_table[];
159 static tree sh_handle_interrupt_handler_attribute PARAMS ((tree *, tree, tree, int, bool *));
160 static tree sh_handle_sp_switch_attribute PARAMS ((tree *, tree, tree, int, bool *));
161 static tree sh_handle_trap_exit_attribute PARAMS ((tree *, tree, tree, int, bool *));
162 static void sh_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
163 static void sh_insert_attributes PARAMS ((tree, tree *));
164 #ifndef OBJECT_FORMAT_ELF
165 static void sh_asm_named_section PARAMS ((const char *, unsigned int));
166 #endif
167 static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
168 \f
169 /* Initialize the GCC target structure. */
170 #undef TARGET_ATTRIBUTE_TABLE
171 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
172
173 /* The next two are used for debug info when compiling with -gdwarf. */
174 #undef TARGET_ASM_UNALIGNED_HI_OP
175 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
176 #undef TARGET_ASM_UNALIGNED_SI_OP
177 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
178
179 #undef TARGET_ASM_FUNCTION_EPILOGUE
180 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
181
182 #undef TARGET_INSERT_ATTRIBUTES
183 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
184
185 #undef TARGET_SCHED_ADJUST_COST
186 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
187
188 struct gcc_target targetm = TARGET_INITIALIZER;
189 \f
190 /* Print the operand address in x to the stream. */
191
192 void
193 print_operand_address (stream, x)
194 FILE *stream;
195 rtx x;
196 {
197 switch (GET_CODE (x))
198 {
199 case REG:
200 case SUBREG:
201 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
202 break;
203
204 case PLUS:
205 {
206 rtx base = XEXP (x, 0);
207 rtx index = XEXP (x, 1);
208
209 switch (GET_CODE (index))
210 {
211 case CONST_INT:
212 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
213 reg_names[true_regnum (base)]);
214 break;
215
216 case REG:
217 case SUBREG:
218 {
219 int base_num = true_regnum (base);
220 int index_num = true_regnum (index);
221
222 fprintf (stream, "@(r0,%s)",
223 reg_names[MAX (base_num, index_num)]);
224 break;
225 }
226
227 default:
228 debug_rtx (x);
229 abort ();
230 }
231 }
232 break;
233
234 case PRE_DEC:
235 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
236 break;
237
238 case POST_INC:
239 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
240 break;
241
242 default:
243 x = mark_constant_pool_use (x);
244 output_addr_const (stream, x);
245 break;
246 }
247 }
248
249 /* Print operand x (an rtx) in assembler syntax to file stream
250 according to modifier code.
251
252 '.' print a .s if insn needs delay slot
253 ',' print LOCAL_LABEL_PREFIX
254 '@' print trap, rte or rts depending upon pragma interruptness
255 '#' output a nop if there is nothing to put in the delay slot
256 'O' print a constant without the #
257 'R' print the LSW of a dp value - changes if in little endian
258 'S' print the MSW of a dp value - changes if in little endian
259 'T' print the next word of a dp value - same as 'R' in big endian mode.
260 'o' output an operator. */
261
262 void
263 print_operand (stream, x, code)
264 FILE *stream;
265 rtx x;
266 int code;
267 {
268 switch (code)
269 {
270 case '.':
271 if (final_sequence
272 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
273 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
274 break;
275 case ',':
276 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
277 break;
278 case '@':
279 {
280 int interrupt_handler;
281
282 if ((lookup_attribute
283 ("interrupt_handler",
284 DECL_ATTRIBUTES (current_function_decl)))
285 != NULL_TREE)
286 interrupt_handler = 1;
287 else
288 interrupt_handler = 0;
289
290 if (trap_exit)
291 fprintf (stream, "trapa #%d", trap_exit);
292 else if (interrupt_handler)
293 fprintf (stream, "rte");
294 else
295 fprintf (stream, "rts");
296 break;
297 }
298 case '#':
299 /* Output a nop if there's nothing in the delay slot. */
300 if (dbr_sequence_length () == 0)
301 fprintf (stream, "\n\tnop");
302 break;
303 case 'O':
304 x = mark_constant_pool_use (x);
305 output_addr_const (stream, x);
306 break;
307 case 'R':
308 fputs (reg_names[REGNO (x) + LSW], (stream));
309 break;
310 case 'S':
311 fputs (reg_names[REGNO (x) + MSW], (stream));
312 break;
313 case 'T':
314 /* Next word of a double. */
315 switch (GET_CODE (x))
316 {
317 case REG:
318 fputs (reg_names[REGNO (x) + 1], (stream));
319 break;
320 case MEM:
321 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
322 && GET_CODE (XEXP (x, 0)) != POST_INC)
323 x = adjust_address (x, SImode, 4);
324 print_operand_address (stream, XEXP (x, 0));
325 break;
326 default:
327 break;
328 }
329 break;
330 case 'o':
331 switch (GET_CODE (x))
332 {
333 case PLUS: fputs ("add", stream); break;
334 case MINUS: fputs ("sub", stream); break;
335 case MULT: fputs ("mul", stream); break;
336 case DIV: fputs ("div", stream); break;
337 default:
338 break;
339 }
340 break;
341 default:
342 switch (GET_CODE (x))
343 {
344 case REG:
345 if (FP_REGISTER_P (REGNO (x))
346 && GET_MODE_SIZE (GET_MODE (x)) > 4)
347 fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
348 else
349 fputs (reg_names[REGNO (x)], (stream));
350 break;
351 case MEM:
352 output_address (XEXP (x, 0));
353 break;
354 default:
355 fputc ('#', stream);
356 output_addr_const (stream, x);
357 break;
358 }
359 break;
360 }
361 }
362 \f
363 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
364 static void
365 force_into (value, target)
366 rtx value, target;
367 {
368 value = force_operand (value, target);
369 if (! rtx_equal_p (value, target))
370 emit_insn (gen_move_insn (target, value));
371 }
372
373 /* Emit code to perform a block move. Choose the best method.
374
375 OPERANDS[0] is the destination.
376 OPERANDS[1] is the source.
377 OPERANDS[2] is the size.
378 OPERANDS[3] is the alignment safe to use. */
379
380 int
381 expand_block_move (operands)
382 rtx *operands;
383 {
384 int align = INTVAL (operands[3]);
385 int constp = (GET_CODE (operands[2]) == CONST_INT);
386 int bytes = (constp ? INTVAL (operands[2]) : 0);
387
388 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
389 alignment, or if it isn't a multiple of 4 bytes, then fail. */
390 if (! constp || align < 4 || (bytes % 4 != 0))
391 return 0;
392
393 if (TARGET_HARD_SH4)
394 {
395 if (bytes < 12)
396 return 0;
397 else if (bytes == 12)
398 {
399 tree entry_name;
400 rtx sym;
401 rtx func_addr_rtx;
402 rtx r4 = gen_rtx (REG, SImode, 4);
403 rtx r5 = gen_rtx (REG, SImode, 5);
404
405 entry_name = get_identifier ("__movstrSI12_i4");
406
407 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
408 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
409 force_into (XEXP (operands[0], 0), r4);
410 force_into (XEXP (operands[1], 0), r5);
411 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
412 return 1;
413 }
414 else if (! TARGET_SMALLCODE)
415 {
416 tree entry_name;
417 rtx sym;
418 rtx func_addr_rtx;
419 int dwords;
420 rtx r4 = gen_rtx (REG, SImode, 4);
421 rtx r5 = gen_rtx (REG, SImode, 5);
422 rtx r6 = gen_rtx (REG, SImode, 6);
423
424 entry_name = get_identifier (bytes & 4
425 ? "__movstr_i4_odd"
426 : "__movstr_i4_even");
427 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
428 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
429 force_into (XEXP (operands[0], 0), r4);
430 force_into (XEXP (operands[1], 0), r5);
431
432 dwords = bytes >> 3;
433 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
434 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
435 return 1;
436 }
437 else
438 return 0;
439 }
440 if (bytes < 64)
441 {
442 char entry[30];
443 tree entry_name;
444 rtx sym;
445 rtx func_addr_rtx;
446 rtx r4 = gen_rtx_REG (SImode, 4);
447 rtx r5 = gen_rtx_REG (SImode, 5);
448
449 sprintf (entry, "__movstrSI%d", bytes);
450 entry_name = get_identifier (entry);
451 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
452 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
453 force_into (XEXP (operands[0], 0), r4);
454 force_into (XEXP (operands[1], 0), r5);
455 emit_insn (gen_block_move_real (func_addr_rtx));
456 return 1;
457 }
458
459 /* This is the same number of bytes as a memcpy call, but to a different
460 less common function name, so this will occasionally use more space. */
461 if (! TARGET_SMALLCODE)
462 {
463 tree entry_name;
464 rtx sym;
465 rtx func_addr_rtx;
466 int final_switch, while_loop;
467 rtx r4 = gen_rtx_REG (SImode, 4);
468 rtx r5 = gen_rtx_REG (SImode, 5);
469 rtx r6 = gen_rtx_REG (SImode, 6);
470
471 entry_name = get_identifier ("__movstr");
472 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
473 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
474 force_into (XEXP (operands[0], 0), r4);
475 force_into (XEXP (operands[1], 0), r5);
476
477 /* r6 controls the size of the move. 16 is decremented from it
478 for each 64 bytes moved. Then the negative bit left over is used
479 as an index into a list of move instructions. e.g., a 72 byte move
480 would be set up with size(r6) = 14, for one iteration through the
481 big while loop, and a switch of -2 for the last part. */
482
483 final_switch = 16 - ((bytes / 4) % 16);
484 while_loop = ((bytes / 4) / 16 - 1) * 16;
485 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
486 emit_insn (gen_block_lump_real (func_addr_rtx));
487 return 1;
488 }
489
490 return 0;
491 }
492
493 /* Prepare operands for a move define_expand; specifically, one of the
494 operands must be in a register. */
495
496 int
497 prepare_move_operands (operands, mode)
498 rtx operands[];
499 enum machine_mode mode;
500 {
501 if (mode == SImode && flag_pic)
502 {
503 rtx temp;
504 if (SYMBOLIC_CONST_P (operands[1]))
505 {
506 if (GET_CODE (operands[0]) == MEM)
507 operands[1] = force_reg (Pmode, operands[1]);
508 else
509 {
510 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
511 operands[1] = legitimize_pic_address (operands[1], SImode, temp);
512 }
513 }
514 else if (GET_CODE (operands[1]) == CONST
515 && GET_CODE (XEXP (operands[1], 0)) == PLUS
516 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
517 {
518 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
519 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
520 SImode, temp);
521 operands[1] = expand_binop (SImode, add_optab, temp,
522 XEXP (XEXP (operands[1], 0), 1),
523 no_new_pseudos ? temp
524 : gen_reg_rtx (Pmode),
525 0, OPTAB_LIB_WIDEN);
526 }
527 }
528
529 if (! reload_in_progress && ! reload_completed)
530 {
531 /* Copy the source to a register if both operands aren't registers. */
532 if (! register_operand (operands[0], mode)
533 && ! register_operand (operands[1], mode))
534 operands[1] = copy_to_mode_reg (mode, operands[1]);
535
536 /* This case can happen while generating code to move the result
537 of a library call to the target. Reject `st r0,@(rX,rY)' because
538 reload will fail to find a spill register for rX, since r0 is already
539 being used for the source. */
540 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
541 && GET_CODE (operands[0]) == MEM
542 && GET_CODE (XEXP (operands[0], 0)) == PLUS
543 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
544 operands[1] = copy_to_mode_reg (mode, operands[1]);
545 }
546
547 return 0;
548 }
549
550 /* Prepare the operands for an scc instruction; make sure that the
551 compare has been done. */
552 rtx
553 prepare_scc_operands (code)
554 enum rtx_code code;
555 {
556 rtx t_reg = gen_rtx_REG (SImode, T_REG);
557 enum rtx_code oldcode = code;
558 enum machine_mode mode;
559
560 /* First need a compare insn. */
561 switch (code)
562 {
563 case NE:
564 /* It isn't possible to handle this case. */
565 abort ();
566 case LT:
567 code = GT;
568 break;
569 case LE:
570 code = GE;
571 break;
572 case LTU:
573 code = GTU;
574 break;
575 case LEU:
576 code = GEU;
577 break;
578 default:
579 break;
580 }
581 if (code != oldcode)
582 {
583 rtx tmp = sh_compare_op0;
584 sh_compare_op0 = sh_compare_op1;
585 sh_compare_op1 = tmp;
586 }
587
588 mode = GET_MODE (sh_compare_op0);
589 if (mode == VOIDmode)
590 mode = GET_MODE (sh_compare_op1);
591
592 sh_compare_op0 = force_reg (mode, sh_compare_op0);
593 if ((code != EQ && code != NE
594 && (sh_compare_op1 != const0_rtx
595 || code == GTU || code == GEU || code == LTU || code == LEU))
596 || (mode == DImode && sh_compare_op1 != const0_rtx)
597 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
598 sh_compare_op1 = force_reg (mode, sh_compare_op1);
599
600 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
601 (mode == SFmode ? emit_sf_insn : emit_df_insn)
602 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
603 gen_rtx (SET, VOIDmode, t_reg,
604 gen_rtx (code, SImode,
605 sh_compare_op0, sh_compare_op1)),
606 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
607 else
608 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
609 gen_rtx (code, SImode, sh_compare_op0,
610 sh_compare_op1)));
611
612 return t_reg;
613 }
614
615 /* Called from the md file, set up the operands of a compare instruction. */
616
617 void
618 from_compare (operands, code)
619 rtx *operands;
620 int code;
621 {
622 enum machine_mode mode = GET_MODE (sh_compare_op0);
623 rtx insn;
624 if (mode == VOIDmode)
625 mode = GET_MODE (sh_compare_op1);
626 if (code != EQ
627 || mode == DImode
628 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
629 {
630 /* Force args into regs, since we can't use constants here. */
631 sh_compare_op0 = force_reg (mode, sh_compare_op0);
632 if (sh_compare_op1 != const0_rtx
633 || code == GTU || code == GEU
634 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
635 sh_compare_op1 = force_reg (mode, sh_compare_op1);
636 }
637 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
638 {
639 from_compare (operands, GT);
640 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
641 }
642 else
643 insn = gen_rtx_SET (VOIDmode,
644 gen_rtx_REG (SImode, T_REG),
645 gen_rtx (code, SImode, sh_compare_op0,
646 sh_compare_op1));
647 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
648 {
649 insn = gen_rtx (PARALLEL, VOIDmode,
650 gen_rtvec (2, insn,
651 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
652 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
653 }
654 else
655 emit_insn (insn);
656 }
657 \f
658 /* Functions to output assembly code. */
659
660 /* Return a sequence of instructions to perform DI or DF move.
661
662 Since the SH cannot move a DI or DF in one instruction, we have
663 to take care when we see overlapping source and dest registers. */
664
665 const char *
666 output_movedouble (insn, operands, mode)
667 rtx insn ATTRIBUTE_UNUSED;
668 rtx operands[];
669 enum machine_mode mode;
670 {
671 rtx dst = operands[0];
672 rtx src = operands[1];
673
674 if (GET_CODE (dst) == MEM
675 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
676 return "mov.l %T1,%0\n\tmov.l %1,%0";
677
678 if (register_operand (dst, mode)
679 && register_operand (src, mode))
680 {
681 if (REGNO (src) == MACH_REG)
682 return "sts mach,%S0\n\tsts macl,%R0";
683
684 /* When mov.d r1,r2 do r2->r3 then r1->r2;
685 when mov.d r1,r0 do r1->r0 then r2->r1. */
686
687 if (REGNO (src) + 1 == REGNO (dst))
688 return "mov %T1,%T0\n\tmov %1,%0";
689 else
690 return "mov %1,%0\n\tmov %T1,%T0";
691 }
692 else if (GET_CODE (src) == CONST_INT)
693 {
694 if (INTVAL (src) < 0)
695 output_asm_insn ("mov #-1,%S0", operands);
696 else
697 output_asm_insn ("mov #0,%S0", operands);
698
699 return "mov %1,%R0";
700 }
701 else if (GET_CODE (src) == MEM)
702 {
703 int ptrreg = -1;
704 int dreg = REGNO (dst);
705 rtx inside = XEXP (src, 0);
706
707 if (GET_CODE (inside) == REG)
708 ptrreg = REGNO (inside);
709 else if (GET_CODE (inside) == SUBREG)
710 ptrreg = subreg_regno (inside);
711 else if (GET_CODE (inside) == PLUS)
712 {
713 ptrreg = REGNO (XEXP (inside, 0));
714 /* ??? A r0+REG address shouldn't be possible here, because it isn't
715 an offsettable address. Unfortunately, offsettable addresses use
716 QImode to check the offset, and a QImode offsettable address
717 requires r0 for the other operand, which is not currently
718 supported, so we can't use the 'o' constraint.
719 Thus we must check for and handle r0+REG addresses here.
720 We punt for now, since this is likely very rare. */
721 if (GET_CODE (XEXP (inside, 1)) == REG)
722 abort ();
723 }
724 else if (GET_CODE (inside) == LABEL_REF)
725 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
726 else if (GET_CODE (inside) == POST_INC)
727 return "mov.l %1,%0\n\tmov.l %1,%T0";
728 else
729 abort ();
730
731 /* Work out the safe way to copy. Copy into the second half first. */
732 if (dreg == ptrreg)
733 return "mov.l %T1,%T0\n\tmov.l %1,%0";
734 }
735
736 return "mov.l %1,%0\n\tmov.l %T1,%T0";
737 }
738
739 /* Print an instruction which would have gone into a delay slot after
740 another instruction, but couldn't because the other instruction expanded
741 into a sequence where putting the slot insn at the end wouldn't work. */
742
743 static void
744 print_slot (insn)
745 rtx insn;
746 {
747 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
748
749 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
750 }
751
752 const char *
753 output_far_jump (insn, op)
754 rtx insn;
755 rtx op;
756 {
757 struct { rtx lab, reg, op; } this;
758 rtx braf_base_lab = NULL_RTX;
759 const char *jump;
760 int far;
761 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
762
763 this.lab = gen_label_rtx ();
764
765 if (TARGET_SH2
766 && offset >= -32764
767 && offset - get_attr_length (insn) <= 32766)
768 {
769 far = 0;
770 jump = "mov.w %O0,%1; braf %1";
771 }
772 else
773 {
774 far = 1;
775 if (flag_pic)
776 {
777 if (TARGET_SH2)
778 jump = "mov.l %O0,%1; braf %1";
779 else
780 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
781 }
782 else
783 jump = "mov.l %O0,%1; jmp @%1";
784 }
785 /* If we have a scratch register available, use it. */
786 if (GET_CODE (PREV_INSN (insn)) == INSN
787 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
788 {
789 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
790 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
791 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
792 output_asm_insn (jump, &this.lab);
793 if (dbr_sequence_length ())
794 print_slot (final_sequence);
795 else
796 output_asm_insn ("nop", 0);
797 }
798 else
799 {
800 /* Output the delay slot insn first if any. */
801 if (dbr_sequence_length ())
802 print_slot (final_sequence);
803
804 this.reg = gen_rtx_REG (SImode, 13);
805 output_asm_insn ("mov.l r13,@-r15", 0);
806 output_asm_insn (jump, &this.lab);
807 output_asm_insn ("mov.l @r15+,r13", 0);
808 }
809 if (far && flag_pic && TARGET_SH2)
810 {
811 braf_base_lab = gen_label_rtx ();
812 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
813 CODE_LABEL_NUMBER (braf_base_lab));
814 }
815 if (far)
816 output_asm_insn (".align 2", 0);
817 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
818 this.op = op;
819 if (far && flag_pic)
820 {
821 if (TARGET_SH2)
822 this.lab = braf_base_lab;
823 output_asm_insn (".long %O2-%O0", &this.lab);
824 }
825 else
826 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
827 return "";
828 }
829
830 /* Local label counter, used for constants in the pool and inside
831 pattern branches. */
832
833 static int lf = 100;
834
835 /* Output code for ordinary branches. */
836
837 const char *
838 output_branch (logic, insn, operands)
839 int logic;
840 rtx insn;
841 rtx *operands;
842 {
843 switch (get_attr_length (insn))
844 {
845 case 6:
846 /* This can happen if filling the delay slot has caused a forward
847 branch to exceed its range (we could reverse it, but only
848 when we know we won't overextend other branches; this should
849 best be handled by relaxation).
850 It can also happen when other condbranches hoist delay slot insn
851 from their destination, thus leading to code size increase.
852 But the branch will still be in the range -4092..+4098 bytes. */
853
854 if (! TARGET_RELAX)
855 {
856 int label = lf++;
857 /* The call to print_slot will clobber the operands. */
858 rtx op0 = operands[0];
859
860 /* If the instruction in the delay slot is annulled (true), then
861 there is no delay slot where we can put it now. The only safe
862 place for it is after the label. final will do that by default. */
863
864 if (final_sequence
865 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
866 {
867 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
868 ASSEMBLER_DIALECT ? "/" : ".", label);
869 print_slot (final_sequence);
870 }
871 else
872 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
873
874 output_asm_insn ("bra\t%l0", &op0);
875 fprintf (asm_out_file, "\tnop\n");
876 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
877
878 return "";
879 }
880 /* When relaxing, handle this like a short branch. The linker
881 will fix it up if it still doesn't fit after relaxation. */
882 case 2:
883 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
884 default:
885 /* There should be no longer branches now - that would
886 indicate that something has destroyed the branches set
887 up in machine_dependent_reorg. */
888 abort ();
889 }
890 }
891
892 const char *
893 output_branchy_insn (code, template, insn, operands)
894 enum rtx_code code;
895 const char *template;
896 rtx insn;
897 rtx *operands;
898 {
899 rtx next_insn = NEXT_INSN (insn);
900
901 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
902 {
903 rtx src = SET_SRC (PATTERN (next_insn));
904 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
905 {
906 /* Following branch not taken */
907 operands[9] = gen_label_rtx ();
908 emit_label_after (operands[9], next_insn);
909 INSN_ADDRESSES_NEW (operands[9],
910 INSN_ADDRESSES (INSN_UID (next_insn))
911 + get_attr_length (next_insn));
912 return template;
913 }
914 else
915 {
916 int offset = (branch_dest (next_insn)
917 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
918 if (offset >= -252 && offset <= 258)
919 {
920 if (GET_CODE (src) == IF_THEN_ELSE)
921 /* branch_true */
922 src = XEXP (src, 1);
923 operands[9] = src;
924 return template;
925 }
926 }
927 }
928 operands[9] = gen_label_rtx ();
929 emit_label_after (operands[9], insn);
930 INSN_ADDRESSES_NEW (operands[9],
931 INSN_ADDRESSES (INSN_UID (insn))
932 + get_attr_length (insn));
933 return template;
934 }
935
936 const char *
937 output_ieee_ccmpeq (insn, operands)
938 rtx insn, *operands;
939 {
940 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
941 }
942 \f
943 /* Output to FILE the start of the assembler file. */
944
945 void
946 output_file_start (file)
947 FILE *file;
948 {
949 output_file_directive (file, main_input_filename);
950
951 /* Switch to the data section so that the coffsem symbol
952 isn't in the text section. */
953 data_section ();
954
955 if (TARGET_LITTLE_ENDIAN)
956 fprintf (file, "\t.little\n");
957 }
958 \f
959 /* Actual number of instructions used to make a shift by N. */
960 static const char ashiftrt_insns[] =
961 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
962
963 /* Left shift and logical right shift are the same. */
964 static const char shift_insns[] =
965 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
966
967 /* Individual shift amounts needed to get the above length sequences.
968 One bit right shifts clobber the T bit, so when possible, put one bit
969 shifts in the middle of the sequence, so the ends are eligible for
970 branch delay slots. */
971 static short shift_amounts[32][5] = {
972 {0}, {1}, {2}, {2, 1},
973 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
974 {8}, {8, 1}, {8, 2}, {8, 1, 2},
975 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
976 {16}, {16, 1}, {16, 2}, {16, 1, 2},
977 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
978 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
979 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
980
981 /* Likewise, but for shift amounts < 16, up to three highmost bits
982 might be clobbered. This is typically used when combined with some
983 kind of sign or zero extension. */
984
985 static const char ext_shift_insns[] =
986 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
987
988 static const short ext_shift_amounts[32][4] = {
989 {0}, {1}, {2}, {2, 1},
990 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
991 {8}, {8, 1}, {8, 2}, {8, 1, 2},
992 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
993 {16}, {16, 1}, {16, 2}, {16, 1, 2},
994 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
995 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
996 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
997
998 /* Assuming we have a value that has been sign-extended by at least one bit,
999 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1000 to shift it by N without data loss, and quicker than by other means? */
1001 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1002
1003 /* This is used in length attributes in sh.md to help compute the length
1004 of arbitrary constant shift instructions. */
1005
1006 int
1007 shift_insns_rtx (insn)
1008 rtx insn;
1009 {
1010 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1011 int shift_count = INTVAL (XEXP (set_src, 1));
1012 enum rtx_code shift_code = GET_CODE (set_src);
1013
1014 switch (shift_code)
1015 {
1016 case ASHIFTRT:
1017 return ashiftrt_insns[shift_count];
1018 case LSHIFTRT:
1019 case ASHIFT:
1020 return shift_insns[shift_count];
1021 default:
1022 abort();
1023 }
1024 }
1025
1026 /* Return the cost of a shift. */
1027
1028 int
1029 shiftcosts (x)
1030 rtx x;
1031 {
1032 int value;
1033
1034 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1035 {
1036 if (GET_MODE (x) == DImode
1037 && GET_CODE (XEXP (x, 1)) == CONST_INT
1038 && INTVAL (XEXP (x, 1)) == 1)
1039 return 2;
1040
1041 /* Everything else is invalid, because there is no pattern for it. */
1042 return 10000;
1043 }
1044 /* If shift by a non constant, then this will be expensive. */
1045 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1046 return SH_DYNAMIC_SHIFT_COST;
1047
1048 value = INTVAL (XEXP (x, 1));
1049
1050 /* Otherwise, return the true cost in instructions. */
1051 if (GET_CODE (x) == ASHIFTRT)
1052 {
1053 int cost = ashiftrt_insns[value];
1054 /* If SH3, then we put the constant in a reg and use shad. */
1055 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1056 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1057 return cost;
1058 }
1059 else
1060 return shift_insns[value];
1061 }
1062
1063 /* Return the cost of an AND operation. */
1064
1065 int
1066 andcosts (x)
1067 rtx x;
1068 {
1069 int i;
1070
1071 /* Anding with a register is a single cycle and instruction. */
1072 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1073 return 1;
1074
1075 i = INTVAL (XEXP (x, 1));
1076 /* These constants are single cycle extu.[bw] instructions. */
1077 if (i == 0xff || i == 0xffff)
1078 return 1;
1079 /* Constants that can be used in an and immediate instruction is a single
1080 cycle, but this requires r0, so make it a little more expensive. */
1081 if (CONST_OK_FOR_L (i))
1082 return 2;
1083 /* Constants that can be loaded with a mov immediate and an and.
1084 This case is probably unnecessary. */
1085 if (CONST_OK_FOR_I (i))
1086 return 2;
1087 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1088 This case is probably unnecessary. */
1089 return 3;
1090 }
1091
1092 /* Return the cost of an addition or a subtraction. */
1093
1094 int
1095 addsubcosts (x)
1096 rtx x;
1097 {
1098 /* Adding a register is a single cycle insn. */
1099 if (GET_CODE (XEXP (x, 1)) == REG
1100 || GET_CODE (XEXP (x, 1)) == SUBREG)
1101 return 1;
1102
1103 /* Likewise for small constants. */
1104 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1105 && CONST_OK_FOR_I (INTVAL (XEXP (x, 1))))
1106 return 1;
1107
1108 /* Any other constant requires a 2 cycle pc-relative load plus an
1109 addition. */
1110 return 3;
1111 }
1112
1113 /* Return the cost of a multiply. */
1114 int
1115 multcosts (x)
1116 rtx x ATTRIBUTE_UNUSED;
1117 {
1118 if (TARGET_SH2)
1119 {
1120 /* We have a mul insn, so we can never take more than the mul and the
1121 read of the mac reg, but count more because of the latency and extra
1122 reg usage. */
1123 if (TARGET_SMALLCODE)
1124 return 2;
1125 return 3;
1126 }
1127
1128 /* If we're aiming at small code, then just count the number of
1129 insns in a multiply call sequence. */
1130 if (TARGET_SMALLCODE)
1131 return 5;
1132
1133 /* Otherwise count all the insns in the routine we'd be calling too. */
1134 return 20;
1135 }
1136
1137 /* Code to expand a shift. */
1138
1139 void
1140 gen_ashift (type, n, reg)
1141 int type;
1142 int n;
1143 rtx reg;
1144 {
1145 /* Negative values here come from the shift_amounts array. */
1146 if (n < 0)
1147 {
1148 if (type == ASHIFT)
1149 type = LSHIFTRT;
1150 else
1151 type = ASHIFT;
1152 n = -n;
1153 }
1154
1155 switch (type)
1156 {
1157 case ASHIFTRT:
1158 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1159 break;
1160 case LSHIFTRT:
1161 if (n == 1)
1162 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1163 else
1164 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1165 break;
1166 case ASHIFT:
1167 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1168 break;
1169 }
1170 }
1171
1172 /* Same for HImode */
1173
1174 void
1175 gen_ashift_hi (type, n, reg)
1176 int type;
1177 int n;
1178 rtx reg;
1179 {
1180 /* Negative values here come from the shift_amounts array. */
1181 if (n < 0)
1182 {
1183 if (type == ASHIFT)
1184 type = LSHIFTRT;
1185 else
1186 type = ASHIFT;
1187 n = -n;
1188 }
1189
1190 switch (type)
1191 {
1192 case ASHIFTRT:
1193 case LSHIFTRT:
1194 /* We don't have HImode right shift operations because using the
1195 ordinary 32 bit shift instructions for that doesn't generate proper
1196 zero/sign extension.
1197 gen_ashift_hi is only called in contexts where we know that the
1198 sign extension works out correctly. */
1199 {
1200 int offset = 0;
1201 if (GET_CODE (reg) == SUBREG)
1202 {
1203 offset = SUBREG_BYTE (reg);
1204 reg = SUBREG_REG (reg);
1205 }
1206 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1207 break;
1208 }
1209 case ASHIFT:
1210 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1211 break;
1212 }
1213 }
1214
1215 /* Output RTL to split a constant shift into its component SH constant
1216 shift instructions. */
1217
1218 void
1219 gen_shifty_op (code, operands)
1220 int code;
1221 rtx *operands;
1222 {
1223 int value = INTVAL (operands[2]);
1224 int max, i;
1225
1226 /* Truncate the shift count in case it is out of bounds. */
1227 value = value & 0x1f;
1228
1229 if (value == 31)
1230 {
1231 if (code == LSHIFTRT)
1232 {
1233 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1234 emit_insn (gen_movt (operands[0]));
1235 return;
1236 }
1237 else if (code == ASHIFT)
1238 {
1239 /* There is a two instruction sequence for 31 bit left shifts,
1240 but it requires r0. */
1241 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1242 {
1243 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1244 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1245 return;
1246 }
1247 }
1248 }
1249 else if (value == 0)
1250 {
1251 /* This can happen when not optimizing. We must output something here
1252 to prevent the compiler from aborting in final.c after the try_split
1253 call. */
1254 emit_insn (gen_nop ());
1255 return;
1256 }
1257
1258 max = shift_insns[value];
1259 for (i = 0; i < max; i++)
1260 gen_ashift (code, shift_amounts[value][i], operands[0]);
1261 }
1262
1263 /* Same as above, but optimized for values where the topmost bits don't
1264 matter. */
1265
1266 void
1267 gen_shifty_hi_op (code, operands)
1268 int code;
1269 rtx *operands;
1270 {
1271 int value = INTVAL (operands[2]);
1272 int max, i;
1273 void (*gen_fun) PARAMS ((int, int, rtx));
1274
1275 /* This operation is used by and_shl for SImode values with a few
1276 high bits known to be cleared. */
1277 value &= 31;
1278 if (value == 0)
1279 {
1280 emit_insn (gen_nop ());
1281 return;
1282 }
1283
1284 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1285 if (code == ASHIFT)
1286 {
1287 max = ext_shift_insns[value];
1288 for (i = 0; i < max; i++)
1289 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1290 }
1291 else
1292 /* When shifting right, emit the shifts in reverse order, so that
1293 solitary negative values come first. */
1294 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1295 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1296 }
1297
1298 /* Output RTL for an arithmetic right shift. */
1299
1300 /* ??? Rewrite to use super-optimizer sequences. */
1301
1302 int
1303 expand_ashiftrt (operands)
1304 rtx *operands;
1305 {
1306 rtx sym;
1307 rtx wrk;
1308 char func[18];
1309 tree func_name;
1310 int value;
1311
1312 if (TARGET_SH3)
1313 {
1314 if (GET_CODE (operands[2]) != CONST_INT)
1315 {
1316 rtx count = copy_to_mode_reg (SImode, operands[2]);
1317 emit_insn (gen_negsi2 (count, count));
1318 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1319 return 1;
1320 }
1321 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1322 > 1 + SH_DYNAMIC_SHIFT_COST)
1323 {
1324 rtx count
1325 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1326 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1327 return 1;
1328 }
1329 }
1330 if (GET_CODE (operands[2]) != CONST_INT)
1331 return 0;
1332
1333 value = INTVAL (operands[2]) & 31;
1334
1335 if (value == 31)
1336 {
1337 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1338 return 1;
1339 }
1340 else if (value >= 16 && value <= 19)
1341 {
1342 wrk = gen_reg_rtx (SImode);
1343 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1344 value -= 16;
1345 while (value--)
1346 gen_ashift (ASHIFTRT, 1, wrk);
1347 emit_move_insn (operands[0], wrk);
1348 return 1;
1349 }
1350 /* Expand a short sequence inline, longer call a magic routine. */
1351 else if (value <= 5)
1352 {
1353 wrk = gen_reg_rtx (SImode);
1354 emit_move_insn (wrk, operands[1]);
1355 while (value--)
1356 gen_ashift (ASHIFTRT, 1, wrk);
1357 emit_move_insn (operands[0], wrk);
1358 return 1;
1359 }
1360
1361 wrk = gen_reg_rtx (Pmode);
1362
1363 /* Load the value into an arg reg and call a helper. */
1364 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1365 sprintf (func, "__ashiftrt_r4_%d", value);
1366 func_name = get_identifier (func);
1367 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1368 emit_move_insn (wrk, sym);
1369 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1370 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1371 return 1;
1372 }
1373
1374 int
1375 sh_dynamicalize_shift_p (count)
1376 rtx count;
1377 {
1378 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1379 }
1380
1381 /* Try to find a good way to implement the combiner pattern
1382 [(set (match_operand:SI 0 "register_operand" "r")
1383 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1384 (match_operand:SI 2 "const_int_operand" "n"))
1385 (match_operand:SI 3 "const_int_operand" "n"))) .
1386 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1387 return 0 for simple right / left or left/right shift combination.
1388 return 1 for a combination of shifts with zero_extend.
1389 return 2 for a combination of shifts with an AND that needs r0.
1390 return 3 for a combination of shifts with an AND that needs an extra
1391 scratch register, when the three highmost bits of the AND mask are clear.
1392 return 4 for a combination of shifts with an AND that needs an extra
1393 scratch register, when any of the three highmost bits of the AND mask
1394 is set.
1395 If ATTRP is set, store an initial right shift width in ATTRP[0],
1396 and the instruction length in ATTRP[1] . These values are not valid
1397 when returning 0.
1398 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1399 shift_amounts for the last shift value that is to be used before the
1400 sign extend. */
1401 int
1402 shl_and_kind (left_rtx, mask_rtx, attrp)
1403 rtx left_rtx, mask_rtx;
1404 int *attrp;
1405 {
1406 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1407 int left = INTVAL (left_rtx), right;
1408 int best = 0;
1409 int cost, best_cost = 10000;
1410 int best_right = 0, best_len = 0;
1411 int i;
1412 int can_ext;
1413
1414 if (left < 0 || left > 31)
1415 return 0;
1416 if (GET_CODE (mask_rtx) == CONST_INT)
1417 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1418 else
1419 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1420 /* Can this be expressed as a right shift / left shift pair ? */
1421 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1422 right = exact_log2 (lsb);
1423 mask2 = ~(mask + lsb - 1);
1424 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1425 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1426 if (! mask2)
1427 best_cost = shift_insns[right] + shift_insns[right + left];
1428 /* mask has no trailing zeroes <==> ! right */
1429 else if (! right && mask2 == ~(lsb2 - 1))
1430 {
1431 int late_right = exact_log2 (lsb2);
1432 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1433 }
1434 /* Try to use zero extend */
1435 if (mask2 == ~(lsb2 - 1))
1436 {
1437 int width, first;
1438
1439 for (width = 8; width <= 16; width += 8)
1440 {
1441 /* Can we zero-extend right away? */
1442 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1443 {
1444 cost
1445 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1446 if (cost < best_cost)
1447 {
1448 best = 1;
1449 best_cost = cost;
1450 best_right = right;
1451 best_len = cost;
1452 if (attrp)
1453 attrp[2] = -1;
1454 }
1455 continue;
1456 }
1457 /* ??? Could try to put zero extend into initial right shift,
1458 or even shift a bit left before the right shift. */
1459 /* Determine value of first part of left shift, to get to the
1460 zero extend cut-off point. */
1461 first = width - exact_log2 (lsb2) + right;
1462 if (first >= 0 && right + left - first >= 0)
1463 {
1464 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1465 + ext_shift_insns[right + left - first];
1466 if (cost < best_cost)
1467 {
1468 best = 1;
1469 best_cost = cost;
1470 best_right = right;
1471 best_len = cost;
1472 if (attrp)
1473 attrp[2] = first;
1474 }
1475 }
1476 }
1477 }
1478 /* Try to use r0 AND pattern */
1479 for (i = 0; i <= 2; i++)
1480 {
1481 if (i > right)
1482 break;
1483 if (! CONST_OK_FOR_L (mask >> i))
1484 continue;
1485 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1486 if (cost < best_cost)
1487 {
1488 best = 2;
1489 best_cost = cost;
1490 best_right = i;
1491 best_len = cost - 1;
1492 }
1493 }
1494 /* Try to use a scratch register to hold the AND operand. */
1495 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
1496 for (i = 0; i <= 2; i++)
1497 {
1498 if (i > right)
1499 break;
1500 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1501 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1502 if (cost < best_cost)
1503 {
1504 best = 4 - can_ext;
1505 best_cost = cost;
1506 best_right = i;
1507 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1508 }
1509 }
1510
1511 if (attrp)
1512 {
1513 attrp[0] = best_right;
1514 attrp[1] = best_len;
1515 }
1516 return best;
1517 }
1518
1519 /* This is used in length attributes of the unnamed instructions
1520 corresponding to shl_and_kind return values of 1 and 2. */
1521 int
1522 shl_and_length (insn)
1523 rtx insn;
1524 {
1525 rtx set_src, left_rtx, mask_rtx;
1526 int attributes[3];
1527
1528 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1529 left_rtx = XEXP (XEXP (set_src, 0), 1);
1530 mask_rtx = XEXP (set_src, 1);
1531 shl_and_kind (left_rtx, mask_rtx, attributes);
1532 return attributes[1];
1533 }
1534
1535 /* This is used in length attribute of the and_shl_scratch instruction. */
1536
1537 int
1538 shl_and_scr_length (insn)
1539 rtx insn;
1540 {
1541 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1542 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1543 rtx op = XEXP (set_src, 0);
1544 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1545 op = XEXP (XEXP (op, 0), 0);
1546 return len + shift_insns[INTVAL (XEXP (op, 1))];
1547 }
1548
1549 /* Generating rtl? */
1550 extern int rtx_equal_function_value_matters;
1551
1552 /* Generate rtl for instructions for which shl_and_kind advised a particular
1553 method of generating them, i.e. returned zero. */
1554
1555 int
1556 gen_shl_and (dest, left_rtx, mask_rtx, source)
1557 rtx dest, left_rtx, mask_rtx, source;
1558 {
1559 int attributes[3];
1560 unsigned HOST_WIDE_INT mask;
1561 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1562 int right, total_shift;
1563 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1564
1565 right = attributes[0];
1566 total_shift = INTVAL (left_rtx) + right;
1567 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1568 switch (kind)
1569 {
1570 default:
1571 return -1;
1572 case 1:
1573 {
1574 int first = attributes[2];
1575 rtx operands[3];
1576
1577 if (first < 0)
1578 {
1579 emit_insn ((mask << right) <= 0xff
1580 ? gen_zero_extendqisi2(dest,
1581 gen_lowpart (QImode, source))
1582 : gen_zero_extendhisi2(dest,
1583 gen_lowpart (HImode, source)));
1584 source = dest;
1585 }
1586 if (source != dest)
1587 emit_insn (gen_movsi (dest, source));
1588 operands[0] = dest;
1589 if (right)
1590 {
1591 operands[2] = GEN_INT (right);
1592 gen_shifty_hi_op (LSHIFTRT, operands);
1593 }
1594 if (first > 0)
1595 {
1596 operands[2] = GEN_INT (first);
1597 gen_shifty_hi_op (ASHIFT, operands);
1598 total_shift -= first;
1599 mask <<= first;
1600 }
1601 if (first >= 0)
1602 emit_insn (mask <= 0xff
1603 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1604 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1605 if (total_shift > 0)
1606 {
1607 operands[2] = GEN_INT (total_shift);
1608 gen_shifty_hi_op (ASHIFT, operands);
1609 }
1610 break;
1611 }
1612 case 4:
1613 shift_gen_fun = gen_shifty_op;
1614 case 3:
1615 /* If the topmost bit that matters is set, set the topmost bits
1616 that don't matter. This way, we might be able to get a shorter
1617 signed constant. */
1618 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1619 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1620 case 2:
1621 /* Don't expand fine-grained when combining, because that will
1622 make the pattern fail. */
1623 if (rtx_equal_function_value_matters
1624 || reload_in_progress || reload_completed)
1625 {
1626 rtx operands[3];
1627
1628 /* Cases 3 and 4 should be handled by this split
1629 only while combining */
1630 if (kind > 2)
1631 abort ();
1632 if (right)
1633 {
1634 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1635 source = dest;
1636 }
1637 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1638 if (total_shift)
1639 {
1640 operands[0] = dest;
1641 operands[1] = dest;
1642 operands[2] = GEN_INT (total_shift);
1643 shift_gen_fun (ASHIFT, operands);
1644 }
1645 break;
1646 }
1647 else
1648 {
1649 int neg = 0;
1650 if (kind != 4 && total_shift < 16)
1651 {
1652 neg = -ext_shift_amounts[total_shift][1];
1653 if (neg > 0)
1654 neg -= ext_shift_amounts[total_shift][2];
1655 else
1656 neg = 0;
1657 }
1658 emit_insn (gen_and_shl_scratch (dest, source,
1659 GEN_INT (right),
1660 GEN_INT (mask),
1661 GEN_INT (total_shift + neg),
1662 GEN_INT (neg)));
1663 emit_insn (gen_movsi (dest, dest));
1664 break;
1665 }
1666 }
1667 return 0;
1668 }
1669
1670 /* Try to find a good way to implement the combiner pattern
1671 [(set (match_operand:SI 0 "register_operand" "=r")
1672 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1673 (match_operand:SI 2 "const_int_operand" "n")
1674 (match_operand:SI 3 "const_int_operand" "n")
1675 (const_int 0)))
1676 (clobber (reg:SI T_REG))]
1677 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1678 return 0 for simple left / right shift combination.
1679 return 1 for left shift / 8 bit sign extend / left shift.
1680 return 2 for left shift / 16 bit sign extend / left shift.
1681 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1682 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1683 return 5 for left shift / 16 bit sign extend / right shift
1684 return 6 for < 8 bit sign extend / left shift.
1685 return 7 for < 8 bit sign extend / left shift / single right shift.
1686 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1687
1688 int
1689 shl_sext_kind (left_rtx, size_rtx, costp)
1690 rtx left_rtx, size_rtx;
1691 int *costp;
1692 {
1693 int left, size, insize, ext;
1694 int cost, best_cost;
1695 int kind;
1696
1697 left = INTVAL (left_rtx);
1698 size = INTVAL (size_rtx);
1699 insize = size - left;
1700 if (insize <= 0)
1701 abort ();
1702 /* Default to left / right shift. */
1703 kind = 0;
1704 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1705 if (size <= 16)
1706 {
1707 /* 16 bit shift / sign extend / 16 bit shift */
1708 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1709 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1710 below, by alternative 3 or something even better. */
1711 if (cost < best_cost)
1712 {
1713 kind = 5;
1714 best_cost = cost;
1715 }
1716 }
1717 /* Try a plain sign extend between two shifts. */
1718 for (ext = 16; ext >= insize; ext -= 8)
1719 {
1720 if (ext <= size)
1721 {
1722 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1723 if (cost < best_cost)
1724 {
1725 kind = ext / (unsigned) 8;
1726 best_cost = cost;
1727 }
1728 }
1729 /* Check if we can do a sloppy shift with a final signed shift
1730 restoring the sign. */
1731 if (EXT_SHIFT_SIGNED (size - ext))
1732 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1733 /* If not, maybe it's still cheaper to do the second shift sloppy,
1734 and do a final sign extend? */
1735 else if (size <= 16)
1736 cost = ext_shift_insns[ext - insize] + 1
1737 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1738 else
1739 continue;
1740 if (cost < best_cost)
1741 {
1742 kind = ext / (unsigned) 8 + 2;
1743 best_cost = cost;
1744 }
1745 }
1746 /* Check if we can sign extend in r0 */
1747 if (insize < 8)
1748 {
1749 cost = 3 + shift_insns[left];
1750 if (cost < best_cost)
1751 {
1752 kind = 6;
1753 best_cost = cost;
1754 }
1755 /* Try the same with a final signed shift. */
1756 if (left < 31)
1757 {
1758 cost = 3 + ext_shift_insns[left + 1] + 1;
1759 if (cost < best_cost)
1760 {
1761 kind = 7;
1762 best_cost = cost;
1763 }
1764 }
1765 }
1766 if (TARGET_SH3)
1767 {
1768 /* Try to use a dynamic shift. */
1769 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1770 if (cost < best_cost)
1771 {
1772 kind = 0;
1773 best_cost = cost;
1774 }
1775 }
1776 if (costp)
1777 *costp = cost;
1778 return kind;
1779 }
1780
1781 /* Function to be used in the length attribute of the instructions
1782 implementing this pattern. */
1783
1784 int
1785 shl_sext_length (insn)
1786 rtx insn;
1787 {
1788 rtx set_src, left_rtx, size_rtx;
1789 int cost;
1790
1791 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1792 left_rtx = XEXP (XEXP (set_src, 0), 1);
1793 size_rtx = XEXP (set_src, 1);
1794 shl_sext_kind (left_rtx, size_rtx, &cost);
1795 return cost;
1796 }
1797
1798 /* Generate rtl for this pattern */
1799
1800 int
1801 gen_shl_sext (dest, left_rtx, size_rtx, source)
1802 rtx dest, left_rtx, size_rtx, source;
1803 {
1804 int kind;
1805 int left, size, insize, cost;
1806 rtx operands[3];
1807
1808 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1809 left = INTVAL (left_rtx);
1810 size = INTVAL (size_rtx);
1811 insize = size - left;
1812 switch (kind)
1813 {
1814 case 1:
1815 case 2:
1816 case 3:
1817 case 4:
1818 {
1819 int ext = kind & 1 ? 8 : 16;
1820 int shift2 = size - ext;
1821
1822 /* Don't expand fine-grained when combining, because that will
1823 make the pattern fail. */
1824 if (! rtx_equal_function_value_matters
1825 && ! reload_in_progress && ! reload_completed)
1826 {
1827 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1828 emit_insn (gen_movsi (dest, source));
1829 break;
1830 }
1831 if (dest != source)
1832 emit_insn (gen_movsi (dest, source));
1833 operands[0] = dest;
1834 if (ext - insize)
1835 {
1836 operands[2] = GEN_INT (ext - insize);
1837 gen_shifty_hi_op (ASHIFT, operands);
1838 }
1839 emit_insn (kind & 1
1840 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1841 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1842 if (kind <= 2)
1843 {
1844 if (shift2)
1845 {
1846 operands[2] = GEN_INT (shift2);
1847 gen_shifty_op (ASHIFT, operands);
1848 }
1849 }
1850 else
1851 {
1852 if (shift2 > 0)
1853 {
1854 if (EXT_SHIFT_SIGNED (shift2))
1855 {
1856 operands[2] = GEN_INT (shift2 + 1);
1857 gen_shifty_op (ASHIFT, operands);
1858 operands[2] = GEN_INT (1);
1859 gen_shifty_op (ASHIFTRT, operands);
1860 break;
1861 }
1862 operands[2] = GEN_INT (shift2);
1863 gen_shifty_hi_op (ASHIFT, operands);
1864 }
1865 else if (shift2)
1866 {
1867 operands[2] = GEN_INT (-shift2);
1868 gen_shifty_hi_op (LSHIFTRT, operands);
1869 }
1870 emit_insn (size <= 8
1871 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1872 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1873 }
1874 break;
1875 }
1876 case 5:
1877 {
1878 int i = 16 - size;
1879 if (! rtx_equal_function_value_matters
1880 && ! reload_in_progress && ! reload_completed)
1881 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1882 else
1883 {
1884 operands[0] = dest;
1885 operands[2] = GEN_INT (16 - insize);
1886 gen_shifty_hi_op (ASHIFT, operands);
1887 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1888 }
1889 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1890 while (--i >= 0)
1891 gen_ashift (ASHIFTRT, 1, dest);
1892 break;
1893 }
1894 case 6:
1895 case 7:
1896 /* Don't expand fine-grained when combining, because that will
1897 make the pattern fail. */
1898 if (! rtx_equal_function_value_matters
1899 && ! reload_in_progress && ! reload_completed)
1900 {
1901 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1902 emit_insn (gen_movsi (dest, source));
1903 break;
1904 }
1905 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1906 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1907 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1908 operands[0] = dest;
1909 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1910 gen_shifty_op (ASHIFT, operands);
1911 if (kind == 7)
1912 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1913 break;
1914 default:
1915 return -1;
1916 }
1917 return 0;
1918 }
1919 \f
1920 /* The SH cannot load a large constant into a register, constants have to
1921 come from a pc relative load. The reference of a pc relative load
1922 instruction must be less than 1k infront of the instruction. This
1923 means that we often have to dump a constant inside a function, and
1924 generate code to branch around it.
1925
1926 It is important to minimize this, since the branches will slow things
1927 down and make things bigger.
1928
1929 Worst case code looks like:
1930
1931 mov.l L1,rn
1932 bra L2
1933 nop
1934 align
1935 L1: .long value
1936 L2:
1937 ..
1938
1939 mov.l L3,rn
1940 bra L4
1941 nop
1942 align
1943 L3: .long value
1944 L4:
1945 ..
1946
1947 We fix this by performing a scan before scheduling, which notices which
1948 instructions need to have their operands fetched from the constant table
1949 and builds the table.
1950
1951 The algorithm is:
1952
1953 scan, find an instruction which needs a pcrel move. Look forward, find the
1954 last barrier which is within MAX_COUNT bytes of the requirement.
1955 If there isn't one, make one. Process all the instructions between
1956 the find and the barrier.
1957
1958 In the above example, we can tell that L3 is within 1k of L1, so
1959 the first move can be shrunk from the 3 insn+constant sequence into
1960 just 1 insn, and the constant moved to L3 to make:
1961
1962 mov.l L1,rn
1963 ..
1964 mov.l L3,rn
1965 bra L4
1966 nop
1967 align
1968 L3:.long value
1969 L4:.long value
1970
1971 Then the second move becomes the target for the shortening process. */
1972
1973 typedef struct
1974 {
1975 rtx value; /* Value in table. */
1976 rtx label; /* Label of value. */
1977 rtx wend; /* End of window. */
1978 enum machine_mode mode; /* Mode of value. */
1979 } pool_node;
1980
1981 /* The maximum number of constants that can fit into one pool, since
1982 the pc relative range is 0...1020 bytes and constants are at least 4
1983 bytes long. */
1984
1985 #define MAX_POOL_SIZE (1020/4)
1986 static pool_node pool_vector[MAX_POOL_SIZE];
1987 static int pool_size;
1988 static rtx pool_window_label;
1989 static int pool_window_last;
1990
1991 /* ??? If we need a constant in HImode which is the truncated value of a
1992 constant we need in SImode, we could combine the two entries thus saving
1993 two bytes. Is this common enough to be worth the effort of implementing
1994 it? */
1995
1996 /* ??? This stuff should be done at the same time that we shorten branches.
1997 As it is now, we must assume that all branches are the maximum size, and
1998 this causes us to almost always output constant pools sooner than
1999 necessary. */
2000
2001 /* Add a constant to the pool and return its label. */
2002
2003 static rtx
2004 add_constant (x, mode, last_value)
2005 rtx x;
2006 enum machine_mode mode;
2007 rtx last_value;
2008 {
2009 int i;
2010 rtx lab, new, ref, newref;
2011
2012 /* First see if we've already got it. */
2013 for (i = 0; i < pool_size; i++)
2014 {
2015 if (x->code == pool_vector[i].value->code
2016 && mode == pool_vector[i].mode)
2017 {
2018 if (x->code == CODE_LABEL)
2019 {
2020 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2021 continue;
2022 }
2023 if (rtx_equal_p (x, pool_vector[i].value))
2024 {
2025 lab = new = 0;
2026 if (! last_value
2027 || ! i
2028 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2029 {
2030 new = gen_label_rtx ();
2031 LABEL_REFS (new) = pool_vector[i].label;
2032 pool_vector[i].label = lab = new;
2033 }
2034 if (lab && pool_window_label)
2035 {
2036 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2037 ref = pool_vector[pool_window_last].wend;
2038 LABEL_NEXTREF (newref) = ref;
2039 pool_vector[pool_window_last].wend = newref;
2040 }
2041 if (new)
2042 pool_window_label = new;
2043 pool_window_last = i;
2044 return lab;
2045 }
2046 }
2047 }
2048
2049 /* Need a new one. */
2050 pool_vector[pool_size].value = x;
2051 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2052 lab = 0;
2053 else
2054 lab = gen_label_rtx ();
2055 pool_vector[pool_size].mode = mode;
2056 pool_vector[pool_size].label = lab;
2057 pool_vector[pool_size].wend = NULL_RTX;
2058 if (lab && pool_window_label)
2059 {
2060 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2061 ref = pool_vector[pool_window_last].wend;
2062 LABEL_NEXTREF (newref) = ref;
2063 pool_vector[pool_window_last].wend = newref;
2064 }
2065 if (lab)
2066 pool_window_label = lab;
2067 pool_window_last = pool_size;
2068 pool_size++;
2069 return lab;
2070 }
2071
2072 /* Output the literal table. */
2073
2074 static void
2075 dump_table (scan)
2076 rtx scan;
2077 {
2078 int i;
2079 int need_align = 1;
2080 rtx lab, ref;
2081
2082 /* Do two passes, first time dump out the HI sized constants. */
2083
2084 for (i = 0; i < pool_size; i++)
2085 {
2086 pool_node *p = &pool_vector[i];
2087
2088 if (p->mode == HImode)
2089 {
2090 if (need_align)
2091 {
2092 scan = emit_insn_after (gen_align_2 (), scan);
2093 need_align = 0;
2094 }
2095 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2096 scan = emit_label_after (lab, scan);
2097 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2098 scan);
2099 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2100 {
2101 lab = XEXP (ref, 0);
2102 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2103 }
2104 }
2105 }
2106
2107 need_align = 1;
2108
2109 for (i = 0; i < pool_size; i++)
2110 {
2111 pool_node *p = &pool_vector[i];
2112
2113 switch (p->mode)
2114 {
2115 case HImode:
2116 break;
2117 case SImode:
2118 case SFmode:
2119 if (need_align)
2120 {
2121 need_align = 0;
2122 scan = emit_label_after (gen_label_rtx (), scan);
2123 scan = emit_insn_after (gen_align_4 (), scan);
2124 }
2125 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2126 scan = emit_label_after (lab, scan);
2127 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2128 scan);
2129 break;
2130 case DFmode:
2131 case DImode:
2132 if (need_align)
2133 {
2134 need_align = 0;
2135 scan = emit_label_after (gen_label_rtx (), scan);
2136 scan = emit_insn_after (gen_align_4 (), scan);
2137 }
2138 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2139 scan = emit_label_after (lab, scan);
2140 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2141 scan);
2142 break;
2143 default:
2144 abort ();
2145 break;
2146 }
2147
2148 if (p->mode != HImode)
2149 {
2150 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2151 {
2152 lab = XEXP (ref, 0);
2153 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2154 }
2155 }
2156 }
2157
2158 scan = emit_insn_after (gen_consttable_end (), scan);
2159 scan = emit_barrier_after (scan);
2160 pool_size = 0;
2161 pool_window_label = NULL_RTX;
2162 pool_window_last = 0;
2163 }
2164
2165 /* Return non-zero if constant would be an ok source for a
2166 mov.w instead of a mov.l. */
2167
2168 static int
2169 hi_const (src)
2170 rtx src;
2171 {
2172 return (GET_CODE (src) == CONST_INT
2173 && INTVAL (src) >= -32768
2174 && INTVAL (src) <= 32767);
2175 }
2176
2177 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2178
2179 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2180 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2181 need to fix it if the input value is CONST_OK_FOR_I. */
2182
2183 static int
2184 broken_move (insn)
2185 rtx insn;
2186 {
2187 if (GET_CODE (insn) == INSN)
2188 {
2189 rtx pat = PATTERN (insn);
2190 if (GET_CODE (pat) == PARALLEL)
2191 pat = XVECEXP (pat, 0, 0);
2192 if (GET_CODE (pat) == SET
2193 /* We can load any 8 bit value if we don't care what the high
2194 order bits end up as. */
2195 && GET_MODE (SET_DEST (pat)) != QImode
2196 && (CONSTANT_P (SET_SRC (pat))
2197 /* Match mova_const. */
2198 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2199 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2200 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2201 && ! (TARGET_SH3E
2202 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2203 && (fp_zero_operand (SET_SRC (pat))
2204 || fp_one_operand (SET_SRC (pat)))
2205 /* ??? If this is a -m4 or -m4-single compilation, we don't
2206 know the current setting of fpscr, so disable fldi. */
2207 && (! TARGET_SH4 || TARGET_FMOVD)
2208 && GET_CODE (SET_DEST (pat)) == REG
2209 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2210 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2211 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2212 return 1;
2213 }
2214
2215 return 0;
2216 }
2217
2218 static int
2219 mova_p (insn)
2220 rtx insn;
2221 {
2222 return (GET_CODE (insn) == INSN
2223 && GET_CODE (PATTERN (insn)) == SET
2224 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2225 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2226 /* Don't match mova_const. */
2227 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2228 }
2229
2230 /* Find the last barrier from insn FROM which is close enough to hold the
2231 constant pool. If we can't find one, then create one near the end of
2232 the range. */
2233
2234 static rtx
2235 find_barrier (num_mova, mova, from)
2236 int num_mova;
2237 rtx mova, from;
2238 {
2239 int count_si = 0;
2240 int count_hi = 0;
2241 int found_hi = 0;
2242 int found_si = 0;
2243 int hi_align = 2;
2244 int si_align = 2;
2245 int leading_mova = num_mova;
2246 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2247 int si_limit;
2248 int hi_limit;
2249
2250 /* For HImode: range is 510, add 4 because pc counts from address of
2251 second instruction after this one, subtract 2 for the jump instruction
2252 that we may need to emit before the table, subtract 2 for the instruction
2253 that fills the jump delay slot (in very rare cases, reorg will take an
2254 instruction from after the constant pool or will leave the delay slot
2255 empty). This gives 510.
2256 For SImode: range is 1020, add 4 because pc counts from address of
2257 second instruction after this one, subtract 2 in case pc is 2 byte
2258 aligned, subtract 2 for the jump instruction that we may need to emit
2259 before the table, subtract 2 for the instruction that fills the jump
2260 delay slot. This gives 1018. */
2261
2262 /* The branch will always be shortened now that the reference address for
2263 forward branches is the successor address, thus we need no longer make
2264 adjustments to the [sh]i_limit for -O0. */
2265
2266 si_limit = 1018;
2267 hi_limit = 510;
2268
2269 while (from && count_si < si_limit && count_hi < hi_limit)
2270 {
2271 int inc = get_attr_length (from);
2272 int new_align = 1;
2273
2274 if (GET_CODE (from) == CODE_LABEL)
2275 {
2276 if (optimize)
2277 new_align = 1 << label_to_alignment (from);
2278 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2279 new_align = 1 << barrier_align (from);
2280 else
2281 new_align = 1;
2282 inc = 0;
2283 }
2284
2285 if (GET_CODE (from) == BARRIER)
2286 {
2287
2288 found_barrier = from;
2289
2290 /* If we are at the end of the function, or in front of an alignment
2291 instruction, we need not insert an extra alignment. We prefer
2292 this kind of barrier. */
2293 if (barrier_align (from) > 2)
2294 good_barrier = from;
2295 }
2296
2297 if (broken_move (from))
2298 {
2299 rtx pat, src, dst;
2300 enum machine_mode mode;
2301
2302 pat = PATTERN (from);
2303 if (GET_CODE (pat) == PARALLEL)
2304 pat = XVECEXP (pat, 0, 0);
2305 src = SET_SRC (pat);
2306 dst = SET_DEST (pat);
2307 mode = GET_MODE (dst);
2308
2309 /* We must explicitly check the mode, because sometimes the
2310 front end will generate code to load unsigned constants into
2311 HImode targets without properly sign extending them. */
2312 if (mode == HImode
2313 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2314 {
2315 found_hi += 2;
2316 /* We put the short constants before the long constants, so
2317 we must count the length of short constants in the range
2318 for the long constants. */
2319 /* ??? This isn't optimal, but is easy to do. */
2320 si_limit -= 2;
2321 }
2322 else
2323 {
2324 while (si_align > 2 && found_si + si_align - 2 > count_si)
2325 si_align >>= 1;
2326 if (found_si > count_si)
2327 count_si = found_si;
2328 found_si += GET_MODE_SIZE (mode);
2329 if (num_mova)
2330 si_limit -= GET_MODE_SIZE (mode);
2331 }
2332
2333 /* See the code in machine_dependent_reorg, which has a similar if
2334 statement that generates a new mova insn in many cases. */
2335 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2336 inc += 2;
2337 }
2338
2339 if (mova_p (from))
2340 {
2341 if (! num_mova++)
2342 {
2343 leading_mova = 0;
2344 mova = from;
2345 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2346 }
2347 if (found_si > count_si)
2348 count_si = found_si;
2349 }
2350 else if (GET_CODE (from) == JUMP_INSN
2351 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2352 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2353 {
2354 if (num_mova)
2355 num_mova--;
2356 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2357 {
2358 /* We have just passed the barrier in front of the
2359 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2360 the ADDR_DIFF_VEC is accessed as data, just like our pool
2361 constants, this is a good opportunity to accommodate what
2362 we have gathered so far.
2363 If we waited any longer, we could end up at a barrier in
2364 front of code, which gives worse cache usage for separated
2365 instruction / data caches. */
2366 good_barrier = found_barrier;
2367 break;
2368 }
2369 else
2370 {
2371 rtx body = PATTERN (from);
2372 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2373 }
2374 }
2375 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2376 else if (GET_CODE (from) == JUMP_INSN
2377 && ! TARGET_SH2
2378 && ! TARGET_SMALLCODE)
2379 new_align = 4;
2380
2381 if (found_si)
2382 {
2383 count_si += inc;
2384 if (new_align > si_align)
2385 {
2386 si_limit -= (count_si - 1) & (new_align - si_align);
2387 si_align = new_align;
2388 }
2389 count_si = (count_si + new_align - 1) & -new_align;
2390 }
2391 if (found_hi)
2392 {
2393 count_hi += inc;
2394 if (new_align > hi_align)
2395 {
2396 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2397 hi_align = new_align;
2398 }
2399 count_hi = (count_hi + new_align - 1) & -new_align;
2400 }
2401 from = NEXT_INSN (from);
2402 }
2403
2404 if (num_mova)
2405 {
2406 if (leading_mova)
2407 {
2408 /* Try as we might, the leading mova is out of range. Change
2409 it into a load (which will become a pcload) and retry. */
2410 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2411 INSN_CODE (mova) = -1;
2412 return find_barrier (0, 0, mova);
2413 }
2414 else
2415 {
2416 /* Insert the constant pool table before the mova instruction,
2417 to prevent the mova label reference from going out of range. */
2418 from = mova;
2419 good_barrier = found_barrier = barrier_before_mova;
2420 }
2421 }
2422
2423 if (found_barrier)
2424 {
2425 if (good_barrier && next_real_insn (found_barrier))
2426 found_barrier = good_barrier;
2427 }
2428 else
2429 {
2430 /* We didn't find a barrier in time to dump our stuff,
2431 so we'll make one. */
2432 rtx label = gen_label_rtx ();
2433
2434 /* If we exceeded the range, then we must back up over the last
2435 instruction we looked at. Otherwise, we just need to undo the
2436 NEXT_INSN at the end of the loop. */
2437 if (count_hi > hi_limit || count_si > si_limit)
2438 from = PREV_INSN (PREV_INSN (from));
2439 else
2440 from = PREV_INSN (from);
2441
2442 /* Walk back to be just before any jump or label.
2443 Putting it before a label reduces the number of times the branch
2444 around the constant pool table will be hit. Putting it before
2445 a jump makes it more likely that the bra delay slot will be
2446 filled. */
2447 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2448 || GET_CODE (from) == CODE_LABEL)
2449 from = PREV_INSN (from);
2450
2451 from = emit_jump_insn_after (gen_jump (label), from);
2452 JUMP_LABEL (from) = label;
2453 LABEL_NUSES (label) = 1;
2454 found_barrier = emit_barrier_after (from);
2455 emit_label_after (label, found_barrier);
2456 }
2457
2458 return found_barrier;
2459 }
2460
2461 /* If the instruction INSN is implemented by a special function, and we can
2462 positively find the register that is used to call the sfunc, and this
2463 register is not used anywhere else in this instruction - except as the
2464 destination of a set, return this register; else, return 0. */
2465 rtx
2466 sfunc_uses_reg (insn)
2467 rtx insn;
2468 {
2469 int i;
2470 rtx pattern, part, reg_part, reg;
2471
2472 if (GET_CODE (insn) != INSN)
2473 return 0;
2474 pattern = PATTERN (insn);
2475 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2476 return 0;
2477
2478 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2479 {
2480 part = XVECEXP (pattern, 0, i);
2481 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2482 reg_part = part;
2483 }
2484 if (! reg_part)
2485 return 0;
2486 reg = XEXP (reg_part, 0);
2487 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2488 {
2489 part = XVECEXP (pattern, 0, i);
2490 if (part == reg_part || GET_CODE (part) == CLOBBER)
2491 continue;
2492 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2493 && GET_CODE (SET_DEST (part)) == REG)
2494 ? SET_SRC (part) : part)))
2495 return 0;
2496 }
2497 return reg;
2498 }
2499
2500 /* See if the only way in which INSN uses REG is by calling it, or by
2501 setting it while calling it. Set *SET to a SET rtx if the register
2502 is set by INSN. */
2503
2504 static int
2505 noncall_uses_reg (reg, insn, set)
2506 rtx reg;
2507 rtx insn;
2508 rtx *set;
2509 {
2510 rtx pattern, reg2;
2511
2512 *set = NULL_RTX;
2513
2514 reg2 = sfunc_uses_reg (insn);
2515 if (reg2 && REGNO (reg2) == REGNO (reg))
2516 {
2517 pattern = single_set (insn);
2518 if (pattern
2519 && GET_CODE (SET_DEST (pattern)) == REG
2520 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2521 *set = pattern;
2522 return 0;
2523 }
2524 if (GET_CODE (insn) != CALL_INSN)
2525 {
2526 /* We don't use rtx_equal_p because we don't care if the mode is
2527 different. */
2528 pattern = single_set (insn);
2529 if (pattern
2530 && GET_CODE (SET_DEST (pattern)) == REG
2531 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2532 {
2533 rtx par, part;
2534 int i;
2535
2536 *set = pattern;
2537 par = PATTERN (insn);
2538 if (GET_CODE (par) == PARALLEL)
2539 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2540 {
2541 part = XVECEXP (par, 0, i);
2542 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2543 return 1;
2544 }
2545 return reg_mentioned_p (reg, SET_SRC (pattern));
2546 }
2547
2548 return 1;
2549 }
2550
2551 pattern = PATTERN (insn);
2552
2553 if (GET_CODE (pattern) == PARALLEL)
2554 {
2555 int i;
2556
2557 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2558 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2559 return 1;
2560 pattern = XVECEXP (pattern, 0, 0);
2561 }
2562
2563 if (GET_CODE (pattern) == SET)
2564 {
2565 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2566 {
2567 /* We don't use rtx_equal_p, because we don't care if the
2568 mode is different. */
2569 if (GET_CODE (SET_DEST (pattern)) != REG
2570 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2571 return 1;
2572
2573 *set = pattern;
2574 }
2575
2576 pattern = SET_SRC (pattern);
2577 }
2578
2579 if (GET_CODE (pattern) != CALL
2580 || GET_CODE (XEXP (pattern, 0)) != MEM
2581 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2582 return 1;
2583
2584 return 0;
2585 }
2586
2587 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2588 general registers. Bits 0..15 mean that the respective registers
2589 are used as inputs in the instruction. Bits 16..31 mean that the
2590 registers 0..15, respectively, are used as outputs, or are clobbered.
2591 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2592 int
2593 regs_used (x, is_dest)
2594 rtx x; int is_dest;
2595 {
2596 enum rtx_code code;
2597 const char *fmt;
2598 int i, used = 0;
2599
2600 if (! x)
2601 return used;
2602 code = GET_CODE (x);
2603 switch (code)
2604 {
2605 case REG:
2606 if (REGNO (x) < 16)
2607 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2608 << (REGNO (x) + is_dest));
2609 return 0;
2610 case SUBREG:
2611 {
2612 rtx y = SUBREG_REG (x);
2613
2614 if (GET_CODE (y) != REG)
2615 break;
2616 if (REGNO (y) < 16)
2617 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2618 << (REGNO (y) +
2619 subreg_regno_offset (REGNO (y),
2620 GET_MODE (y),
2621 SUBREG_BYTE (x),
2622 GET_MODE (x)) + is_dest));
2623 return 0;
2624 }
2625 case SET:
2626 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2627 case RETURN:
2628 /* If there was a return value, it must have been indicated with USE. */
2629 return 0x00ffff00;
2630 case CLOBBER:
2631 is_dest = 1;
2632 break;
2633 case MEM:
2634 is_dest = 0;
2635 break;
2636 case CALL:
2637 used |= 0x00ff00f0;
2638 break;
2639 default:
2640 break;
2641 }
2642
2643 fmt = GET_RTX_FORMAT (code);
2644
2645 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2646 {
2647 if (fmt[i] == 'E')
2648 {
2649 register int j;
2650 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2651 used |= regs_used (XVECEXP (x, i, j), is_dest);
2652 }
2653 else if (fmt[i] == 'e')
2654 used |= regs_used (XEXP (x, i), is_dest);
2655 }
2656 return used;
2657 }
2658
2659 /* Create an instruction that prevents redirection of a conditional branch
2660 to the destination of the JUMP with address ADDR.
2661 If the branch needs to be implemented as an indirect jump, try to find
2662 a scratch register for it.
2663 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2664 If any preceding insn that doesn't fit into a delay slot is good enough,
2665 pass 1. Pass 2 if a definite blocking insn is needed.
2666 -1 is used internally to avoid deep recursion.
2667 If a blocking instruction is made or recognized, return it. */
2668
2669 static rtx
2670 gen_block_redirect (jump, addr, need_block)
2671 rtx jump;
2672 int addr, need_block;
2673 {
2674 int dead = 0;
2675 rtx prev = prev_nonnote_insn (jump);
2676 rtx dest;
2677
2678 /* First, check if we already have an instruction that satisfies our need. */
2679 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2680 {
2681 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2682 return prev;
2683 if (GET_CODE (PATTERN (prev)) == USE
2684 || GET_CODE (PATTERN (prev)) == CLOBBER
2685 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2686 prev = jump;
2687 else if ((need_block &= ~1) < 0)
2688 return prev;
2689 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2690 need_block = 0;
2691 }
2692 /* We can't use JUMP_LABEL here because it might be undefined
2693 when not optimizing. */
2694 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2695 /* If the branch is out of range, try to find a scratch register for it. */
2696 if (optimize
2697 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
2698 > 4092 + 4098))
2699 {
2700 rtx scan;
2701 /* Don't look for the stack pointer as a scratch register,
2702 it would cause trouble if an interrupt occurred. */
2703 unsigned try = 0x7fff, used;
2704 int jump_left = flag_expensive_optimizations + 1;
2705
2706 /* It is likely that the most recent eligible instruction is wanted for
2707 the delay slot. Therefore, find out which registers it uses, and
2708 try to avoid using them. */
2709
2710 for (scan = jump; (scan = PREV_INSN (scan)); )
2711 {
2712 enum rtx_code code;
2713
2714 if (INSN_DELETED_P (scan))
2715 continue;
2716 code = GET_CODE (scan);
2717 if (code == CODE_LABEL || code == JUMP_INSN)
2718 break;
2719 if (code == INSN
2720 && GET_CODE (PATTERN (scan)) != USE
2721 && GET_CODE (PATTERN (scan)) != CLOBBER
2722 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2723 {
2724 try &= ~regs_used (PATTERN (scan), 0);
2725 break;
2726 }
2727 }
2728 for (used = dead = 0, scan = JUMP_LABEL (jump);
2729 (scan = NEXT_INSN (scan)); )
2730 {
2731 enum rtx_code code;
2732
2733 if (INSN_DELETED_P (scan))
2734 continue;
2735 code = GET_CODE (scan);
2736 if (GET_RTX_CLASS (code) == 'i')
2737 {
2738 used |= regs_used (PATTERN (scan), 0);
2739 if (code == CALL_INSN)
2740 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2741 dead |= (used >> 16) & ~used;
2742 if (dead & try)
2743 {
2744 dead &= try;
2745 break;
2746 }
2747 if (code == JUMP_INSN)
2748 {
2749 if (jump_left-- && simplejump_p (scan))
2750 scan = JUMP_LABEL (scan);
2751 else
2752 break;
2753 }
2754 }
2755 }
2756 /* Mask out the stack pointer again, in case it was
2757 the only 'free' register we have found. */
2758 dead &= 0x7fff;
2759 }
2760 /* If the immediate destination is still in range, check for possible
2761 threading with a jump beyond the delay slot insn.
2762 Don't check if we are called recursively; the jump has been or will be
2763 checked in a different invocation then. */
2764
2765 else if (optimize && need_block >= 0)
2766 {
2767 rtx next = next_active_insn (next_active_insn (dest));
2768 if (next && GET_CODE (next) == JUMP_INSN
2769 && GET_CODE (PATTERN (next)) == SET
2770 && recog_memoized (next) == CODE_FOR_jump)
2771 {
2772 dest = JUMP_LABEL (next);
2773 if (dest
2774 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
2775 > 4092 + 4098))
2776 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
2777 }
2778 }
2779
2780 if (dead)
2781 {
2782 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
2783
2784 /* It would be nice if we could convert the jump into an indirect
2785 jump / far branch right now, and thus exposing all constituent
2786 instructions to further optimization. However, reorg uses
2787 simplejump_p to determine if there is an unconditional jump where
2788 it should try to schedule instructions from the target of the
2789 branch; simplejump_p fails for indirect jumps even if they have
2790 a JUMP_LABEL. */
2791 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2792 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2793 , jump);
2794 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2795 return insn;
2796 }
2797 else if (need_block)
2798 /* We can't use JUMP_LABEL here because it might be undefined
2799 when not optimizing. */
2800 return emit_insn_before (gen_block_branch_redirect
2801 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2802 , jump);
2803 return prev;
2804 }
2805
2806 #define CONDJUMP_MIN -252
2807 #define CONDJUMP_MAX 262
2808 struct far_branch
2809 {
2810 /* A label (to be placed) in front of the jump
2811 that jumps to our ultimate destination. */
2812 rtx near_label;
2813 /* Where we are going to insert it if we cannot move the jump any farther,
2814 or the jump itself if we have picked up an existing jump. */
2815 rtx insert_place;
2816 /* The ultimate destination. */
2817 rtx far_label;
2818 struct far_branch *prev;
2819 /* If the branch has already been created, its address;
2820 else the address of its first prospective user. */
2821 int address;
2822 };
2823
2824 static void gen_far_branch PARAMS ((struct far_branch *));
2825 enum mdep_reorg_phase_e mdep_reorg_phase;
2826 static void
2827 gen_far_branch (bp)
2828 struct far_branch *bp;
2829 {
2830 rtx insn = bp->insert_place;
2831 rtx jump;
2832 rtx label = gen_label_rtx ();
2833
2834 emit_label_after (label, insn);
2835 if (bp->far_label)
2836 {
2837 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2838 LABEL_NUSES (bp->far_label)++;
2839 }
2840 else
2841 jump = emit_jump_insn_after (gen_return (), insn);
2842 /* Emit a barrier so that reorg knows that any following instructions
2843 are not reachable via a fall-through path.
2844 But don't do this when not optimizing, since we wouldn't supress the
2845 alignment for the barrier then, and could end up with out-of-range
2846 pc-relative loads. */
2847 if (optimize)
2848 emit_barrier_after (jump);
2849 emit_label_after (bp->near_label, insn);
2850 JUMP_LABEL (jump) = bp->far_label;
2851 if (! invert_jump (insn, label, 1))
2852 abort ();
2853 /* Prevent reorg from undoing our splits. */
2854 gen_block_redirect (jump, bp->address += 2, 2);
2855 }
2856
2857 /* Fix up ADDR_DIFF_VECs. */
2858 void
2859 fixup_addr_diff_vecs (first)
2860 rtx first;
2861 {
2862 rtx insn;
2863
2864 for (insn = first; insn; insn = NEXT_INSN (insn))
2865 {
2866 rtx vec_lab, pat, prev, prevpat, x, braf_label;
2867
2868 if (GET_CODE (insn) != JUMP_INSN
2869 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2870 continue;
2871 pat = PATTERN (insn);
2872 vec_lab = XEXP (XEXP (pat, 0), 0);
2873
2874 /* Search the matching casesi_jump_2. */
2875 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2876 {
2877 if (GET_CODE (prev) != JUMP_INSN)
2878 continue;
2879 prevpat = PATTERN (prev);
2880 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2881 continue;
2882 x = XVECEXP (prevpat, 0, 1);
2883 if (GET_CODE (x) != USE)
2884 continue;
2885 x = XEXP (x, 0);
2886 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2887 break;
2888 }
2889
2890 /* Emit the reference label of the braf where it belongs, right after
2891 the casesi_jump_2 (i.e. braf). */
2892 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2893 emit_label_after (braf_label, prev);
2894
2895 /* Fix up the ADDR_DIF_VEC to be relative
2896 to the reference address of the braf. */
2897 XEXP (XEXP (pat, 0), 0) = braf_label;
2898 }
2899 }
2900
2901 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2902 a barrier. Return the base 2 logarithm of the desired alignment. */
2903 int
2904 barrier_align (barrier_or_label)
2905 rtx barrier_or_label;
2906 {
2907 rtx next = next_real_insn (barrier_or_label), pat, prev;
2908 int slot, credit, jump_to_next;
2909
2910 if (! next)
2911 return 0;
2912
2913 pat = PATTERN (next);
2914
2915 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2916 return 2;
2917
2918 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
2919 /* This is a barrier in front of a constant table. */
2920 return 0;
2921
2922 prev = prev_real_insn (barrier_or_label);
2923 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2924 {
2925 pat = PATTERN (prev);
2926 /* If this is a very small table, we want to keep the alignment after
2927 the table to the minimum for proper code alignment. */
2928 return ((TARGET_SMALLCODE
2929 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2930 <= (unsigned)1 << (CACHE_LOG - 2)))
2931 ? 1 : CACHE_LOG);
2932 }
2933
2934 if (TARGET_SMALLCODE)
2935 return 0;
2936
2937 if (! TARGET_SH2 || ! optimize)
2938 return CACHE_LOG;
2939
2940 /* When fixing up pcloads, a constant table might be inserted just before
2941 the basic block that ends with the barrier. Thus, we can't trust the
2942 instruction lengths before that. */
2943 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2944 {
2945 /* Check if there is an immediately preceding branch to the insn beyond
2946 the barrier. We must weight the cost of discarding useful information
2947 from the current cache line when executing this branch and there is
2948 an alignment, against that of fetching unneeded insn in front of the
2949 branch target when there is no alignment. */
2950
2951 /* There are two delay_slot cases to consider. One is the simple case
2952 where the preceding branch is to the insn beyond the barrier (simple
2953 delay slot filling), and the other is where the preceding branch has
2954 a delay slot that is a duplicate of the insn after the barrier
2955 (fill_eager_delay_slots) and the branch is to the insn after the insn
2956 after the barrier. */
2957
2958 /* PREV is presumed to be the JUMP_INSN for the barrier under
2959 investigation. Skip to the insn before it. */
2960 prev = prev_real_insn (prev);
2961
2962 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
2963 credit >= 0 && prev && GET_CODE (prev) == INSN;
2964 prev = prev_real_insn (prev))
2965 {
2966 jump_to_next = 0;
2967 if (GET_CODE (PATTERN (prev)) == USE
2968 || GET_CODE (PATTERN (prev)) == CLOBBER)
2969 continue;
2970 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2971 {
2972 prev = XVECEXP (PATTERN (prev), 0, 1);
2973 if (INSN_UID (prev) == INSN_UID (next))
2974 {
2975 /* Delay slot was filled with insn at jump target. */
2976 jump_to_next = 1;
2977 continue;
2978 }
2979 }
2980
2981 if (slot &&
2982 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2983 slot = 0;
2984 credit -= get_attr_length (prev);
2985 }
2986 if (prev
2987 && GET_CODE (prev) == JUMP_INSN
2988 && JUMP_LABEL (prev))
2989 {
2990 rtx x;
2991 if (jump_to_next
2992 || next_real_insn (JUMP_LABEL (prev)) == next
2993 /* If relax_delay_slots() decides NEXT was redundant
2994 with some previous instruction, it will have
2995 redirected PREV's jump to the following insn. */
2996 || JUMP_LABEL (prev) == next_nonnote_insn (next)
2997 /* There is no upper bound on redundant instructions
2998 that might have been skipped, but we must not put an
2999 alignment where none had been before. */
3000 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3001 (INSN_P (x)
3002 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3003 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
3004 {
3005 rtx pat = PATTERN (prev);
3006 if (GET_CODE (pat) == PARALLEL)
3007 pat = XVECEXP (pat, 0, 0);
3008 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3009 return 0;
3010 }
3011 }
3012 }
3013
3014 return CACHE_LOG;
3015 }
3016
3017 /* If we are inside a phony loop, almost any kind of label can turn up as the
3018 first one in the loop. Aligning a braf label causes incorrect switch
3019 destination addresses; we can detect braf labels because they are
3020 followed by a BARRIER.
3021 Applying loop alignment to small constant or switch tables is a waste
3022 of space, so we suppress this too. */
3023 int
3024 sh_loop_align (label)
3025 rtx label;
3026 {
3027 rtx next = label;
3028
3029 do
3030 next = next_nonnote_insn (next);
3031 while (next && GET_CODE (next) == CODE_LABEL);
3032
3033 if (! next
3034 || ! INSN_P (next)
3035 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3036 || recog_memoized (next) == CODE_FOR_consttable_2)
3037 return 0;
3038 return 2;
3039 }
3040
3041 /* Exported to toplev.c.
3042
3043 Do a final pass over the function, just before delayed branch
3044 scheduling. */
3045
3046 void
3047 machine_dependent_reorg (first)
3048 rtx first;
3049 {
3050 rtx insn, mova;
3051 int num_mova;
3052 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3053 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3054
3055 /* We must split call insns before introducing `mova's. If we're
3056 optimizing, they'll have already been split. Otherwise, make
3057 sure we don't split them too late. */
3058 if (! optimize)
3059 split_all_insns_noflow ();
3060
3061 /* If relaxing, generate pseudo-ops to associate function calls with
3062 the symbols they call. It does no harm to not generate these
3063 pseudo-ops. However, when we can generate them, it enables to
3064 linker to potentially relax the jsr to a bsr, and eliminate the
3065 register load and, possibly, the constant pool entry. */
3066
3067 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3068 if (TARGET_RELAX)
3069 {
3070 /* Remove all REG_LABEL notes. We want to use them for our own
3071 purposes. This works because none of the remaining passes
3072 need to look at them.
3073
3074 ??? But it may break in the future. We should use a machine
3075 dependent REG_NOTE, or some other approach entirely. */
3076 for (insn = first; insn; insn = NEXT_INSN (insn))
3077 {
3078 if (INSN_P (insn))
3079 {
3080 rtx note;
3081
3082 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3083 remove_note (insn, note);
3084 }
3085 }
3086
3087 for (insn = first; insn; insn = NEXT_INSN (insn))
3088 {
3089 rtx pattern, reg, link, set, scan, dies, label;
3090 int rescan = 0, foundinsn = 0;
3091
3092 if (GET_CODE (insn) == CALL_INSN)
3093 {
3094 pattern = PATTERN (insn);
3095
3096 if (GET_CODE (pattern) == PARALLEL)
3097 pattern = XVECEXP (pattern, 0, 0);
3098 if (GET_CODE (pattern) == SET)
3099 pattern = SET_SRC (pattern);
3100
3101 if (GET_CODE (pattern) != CALL
3102 || GET_CODE (XEXP (pattern, 0)) != MEM)
3103 continue;
3104
3105 reg = XEXP (XEXP (pattern, 0), 0);
3106 }
3107 else
3108 {
3109 reg = sfunc_uses_reg (insn);
3110 if (! reg)
3111 continue;
3112 }
3113
3114 if (GET_CODE (reg) != REG)
3115 continue;
3116
3117 /* This is a function call via REG. If the only uses of REG
3118 between the time that it is set and the time that it dies
3119 are in function calls, then we can associate all the
3120 function calls with the setting of REG. */
3121
3122 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3123 {
3124 if (REG_NOTE_KIND (link) != 0)
3125 continue;
3126 set = single_set (XEXP (link, 0));
3127 if (set && rtx_equal_p (reg, SET_DEST (set)))
3128 {
3129 link = XEXP (link, 0);
3130 break;
3131 }
3132 }
3133
3134 if (! link)
3135 {
3136 /* ??? Sometimes global register allocation will have
3137 deleted the insn pointed to by LOG_LINKS. Try
3138 scanning backward to find where the register is set. */
3139 for (scan = PREV_INSN (insn);
3140 scan && GET_CODE (scan) != CODE_LABEL;
3141 scan = PREV_INSN (scan))
3142 {
3143 if (! INSN_P (scan))
3144 continue;
3145
3146 if (! reg_mentioned_p (reg, scan))
3147 continue;
3148
3149 if (noncall_uses_reg (reg, scan, &set))
3150 break;
3151
3152 if (set)
3153 {
3154 link = scan;
3155 break;
3156 }
3157 }
3158 }
3159
3160 if (! link)
3161 continue;
3162
3163 /* The register is set at LINK. */
3164
3165 /* We can only optimize the function call if the register is
3166 being set to a symbol. In theory, we could sometimes
3167 optimize calls to a constant location, but the assembler
3168 and linker do not support that at present. */
3169 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3170 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3171 continue;
3172
3173 /* Scan forward from LINK to the place where REG dies, and
3174 make sure that the only insns which use REG are
3175 themselves function calls. */
3176
3177 /* ??? This doesn't work for call targets that were allocated
3178 by reload, since there may not be a REG_DEAD note for the
3179 register. */
3180
3181 dies = NULL_RTX;
3182 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3183 {
3184 rtx scanset;
3185
3186 /* Don't try to trace forward past a CODE_LABEL if we haven't
3187 seen INSN yet. Ordinarily, we will only find the setting insn
3188 in LOG_LINKS if it is in the same basic block. However,
3189 cross-jumping can insert code labels in between the load and
3190 the call, and can result in situations where a single call
3191 insn may have two targets depending on where we came from. */
3192
3193 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3194 break;
3195
3196 if (! INSN_P (scan))
3197 continue;
3198
3199 /* Don't try to trace forward past a JUMP. To optimize
3200 safely, we would have to check that all the
3201 instructions at the jump destination did not use REG. */
3202
3203 if (GET_CODE (scan) == JUMP_INSN)
3204 break;
3205
3206 if (! reg_mentioned_p (reg, scan))
3207 continue;
3208
3209 if (noncall_uses_reg (reg, scan, &scanset))
3210 break;
3211
3212 if (scan == insn)
3213 foundinsn = 1;
3214
3215 if (scan != insn
3216 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3217 {
3218 /* There is a function call to this register other
3219 than the one we are checking. If we optimize
3220 this call, we need to rescan again below. */
3221 rescan = 1;
3222 }
3223
3224 /* ??? We shouldn't have to worry about SCANSET here.
3225 We should just be able to check for a REG_DEAD note
3226 on a function call. However, the REG_DEAD notes are
3227 apparently not dependable around libcalls; c-torture
3228 execute/920501-2 is a test case. If SCANSET is set,
3229 then this insn sets the register, so it must have
3230 died earlier. Unfortunately, this will only handle
3231 the cases in which the register is, in fact, set in a
3232 later insn. */
3233
3234 /* ??? We shouldn't have to use FOUNDINSN here.
3235 However, the LOG_LINKS fields are apparently not
3236 entirely reliable around libcalls;
3237 newlib/libm/math/e_pow.c is a test case. Sometimes
3238 an insn will appear in LOG_LINKS even though it is
3239 not the most recent insn which sets the register. */
3240
3241 if (foundinsn
3242 && (scanset
3243 || find_reg_note (scan, REG_DEAD, reg)))
3244 {
3245 dies = scan;
3246 break;
3247 }
3248 }
3249
3250 if (! dies)
3251 {
3252 /* Either there was a branch, or some insn used REG
3253 other than as a function call address. */
3254 continue;
3255 }
3256
3257 /* Create a code label, and put it in a REG_LABEL note on
3258 the insn which sets the register, and on each call insn
3259 which uses the register. In final_prescan_insn we look
3260 for the REG_LABEL notes, and output the appropriate label
3261 or pseudo-op. */
3262
3263 label = gen_label_rtx ();
3264 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
3265 REG_NOTES (link));
3266 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
3267 REG_NOTES (insn));
3268 if (rescan)
3269 {
3270 scan = link;
3271 do
3272 {
3273 rtx reg2;
3274
3275 scan = NEXT_INSN (scan);
3276 if (scan != insn
3277 && ((GET_CODE (scan) == CALL_INSN
3278 && reg_mentioned_p (reg, scan))
3279 || ((reg2 = sfunc_uses_reg (scan))
3280 && REGNO (reg2) == REGNO (reg))))
3281 REG_NOTES (scan)
3282 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
3283 }
3284 while (scan != dies);
3285 }
3286 }
3287 }
3288
3289 if (TARGET_SH2)
3290 fixup_addr_diff_vecs (first);
3291
3292 if (optimize)
3293 {
3294 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3295 shorten_branches (first);
3296 }
3297 /* Scan the function looking for move instructions which have to be
3298 changed to pc-relative loads and insert the literal tables. */
3299
3300 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3301 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3302 {
3303 if (mova_p (insn))
3304 {
3305 if (! num_mova++)
3306 mova = insn;
3307 }
3308 else if (GET_CODE (insn) == JUMP_INSN
3309 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3310 && num_mova)
3311 {
3312 rtx scan;
3313 int total;
3314
3315 num_mova--;
3316
3317 /* Some code might have been inserted between the mova and
3318 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3319 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3320 total += get_attr_length (scan);
3321
3322 /* range of mova is 1020, add 4 because pc counts from address of
3323 second instruction after this one, subtract 2 in case pc is 2
3324 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3325 cancels out with alignment effects of the mova itself. */
3326 if (total > 1022)
3327 {
3328 /* Change the mova into a load, and restart scanning
3329 there. broken_move will then return true for mova. */
3330 SET_SRC (PATTERN (mova))
3331 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3332 INSN_CODE (mova) = -1;
3333 insn = mova;
3334 }
3335 }
3336 if (broken_move (insn))
3337 {
3338 rtx scan;
3339 /* Scan ahead looking for a barrier to stick the constant table
3340 behind. */
3341 rtx barrier = find_barrier (num_mova, mova, insn);
3342 rtx last_float_move, last_float = 0, *last_float_addr;
3343
3344 if (num_mova && ! mova_p (mova))
3345 {
3346 /* find_barrier had to change the first mova into a
3347 pcload; thus, we have to start with this new pcload. */
3348 insn = mova;
3349 num_mova = 0;
3350 }
3351 /* Now find all the moves between the points and modify them. */
3352 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3353 {
3354 if (GET_CODE (scan) == CODE_LABEL)
3355 last_float = 0;
3356 if (broken_move (scan))
3357 {
3358 rtx *patp = &PATTERN (scan), pat = *patp;
3359 rtx src, dst;
3360 rtx lab;
3361 rtx newsrc;
3362 enum machine_mode mode;
3363
3364 if (GET_CODE (pat) == PARALLEL)
3365 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3366 src = SET_SRC (pat);
3367 dst = SET_DEST (pat);
3368 mode = GET_MODE (dst);
3369
3370 if (mode == SImode && hi_const (src)
3371 && REGNO (dst) != FPUL_REG)
3372 {
3373 int offset = 0;
3374
3375 mode = HImode;
3376 while (GET_CODE (dst) == SUBREG)
3377 {
3378 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
3379 GET_MODE (SUBREG_REG (dst)),
3380 SUBREG_BYTE (dst),
3381 GET_MODE (dst));
3382 dst = SUBREG_REG (dst);
3383 }
3384 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3385 }
3386
3387 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3388 {
3389 /* This must be an insn that clobbers r0. */
3390 rtx clobber = XVECEXP (PATTERN (scan), 0,
3391 XVECLEN (PATTERN (scan), 0) - 1);
3392
3393 if (GET_CODE (clobber) != CLOBBER
3394 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3395 abort ();
3396
3397 if (last_float
3398 && reg_set_between_p (r0_rtx, last_float_move, scan))
3399 last_float = 0;
3400 lab = add_constant (src, mode, last_float);
3401 if (lab)
3402 emit_insn_before (gen_mova (lab), scan);
3403 else
3404 {
3405 /* There will be a REG_UNUSED note for r0 on
3406 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3407 lest reorg:mark_target_live_regs will not
3408 consider r0 to be used, and we end up with delay
3409 slot insn in front of SCAN that clobbers r0. */
3410 rtx note
3411 = find_regno_note (last_float_move, REG_UNUSED, 0);
3412
3413 /* If we are not optimizing, then there may not be
3414 a note. */
3415 if (note)
3416 PUT_MODE (note, REG_INC);
3417
3418 *last_float_addr = r0_inc_rtx;
3419 }
3420 last_float_move = scan;
3421 last_float = src;
3422 newsrc = gen_rtx (MEM, mode,
3423 (((TARGET_SH4 && ! TARGET_FMOVD)
3424 || REGNO (dst) == FPUL_REG)
3425 ? r0_inc_rtx
3426 : r0_rtx));
3427 last_float_addr = &XEXP (newsrc, 0);
3428
3429 /* Remove the clobber of r0. */
3430 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3431 }
3432 /* This is a mova needing a label. Create it. */
3433 else if (GET_CODE (src) == UNSPEC
3434 && XINT (src, 1) == UNSPEC_MOVA
3435 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3436 {
3437 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3438 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3439 newsrc = gen_rtx_UNSPEC (VOIDmode,
3440 gen_rtvec (1, newsrc),
3441 UNSPEC_MOVA);
3442 }
3443 else
3444 {
3445 lab = add_constant (src, mode, 0);
3446 newsrc = gen_rtx_MEM (mode,
3447 gen_rtx_LABEL_REF (VOIDmode, lab));
3448 }
3449 RTX_UNCHANGING_P (newsrc) = 1;
3450 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3451 INSN_CODE (scan) = -1;
3452 }
3453 }
3454 dump_table (barrier);
3455 insn = barrier;
3456 }
3457 }
3458
3459 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3460 INSN_ADDRESSES_FREE ();
3461 split_branches (first);
3462
3463 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3464 also has an effect on the register that holds the addres of the sfunc.
3465 Insert an extra dummy insn in front of each sfunc that pretends to
3466 use this register. */
3467 if (flag_delayed_branch)
3468 {
3469 for (insn = first; insn; insn = NEXT_INSN (insn))
3470 {
3471 rtx reg = sfunc_uses_reg (insn);
3472
3473 if (! reg)
3474 continue;
3475 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3476 }
3477 }
3478 #if 0
3479 /* fpscr is not actually a user variable, but we pretend it is for the
3480 sake of the previous optimization passes, since we want it handled like
3481 one. However, we don't have any debugging information for it, so turn
3482 it into a non-user variable now. */
3483 if (TARGET_SH4)
3484 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3485 #endif
3486 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3487 }
3488
3489 int
3490 get_dest_uid (label, max_uid)
3491 rtx label;
3492 int max_uid;
3493 {
3494 rtx dest = next_real_insn (label);
3495 int dest_uid;
3496 if (! dest)
3497 /* This can happen for an undefined label. */
3498 return 0;
3499 dest_uid = INSN_UID (dest);
3500 /* If this is a newly created branch redirection blocking instruction,
3501 we cannot index the branch_uid or insn_addresses arrays with its
3502 uid. But then, we won't need to, because the actual destination is
3503 the following branch. */
3504 while (dest_uid >= max_uid)
3505 {
3506 dest = NEXT_INSN (dest);
3507 dest_uid = INSN_UID (dest);
3508 }
3509 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3510 return 0;
3511 return dest_uid;
3512 }
3513
3514 /* Split condbranches that are out of range. Also add clobbers for
3515 scratch registers that are needed in far jumps.
3516 We do this before delay slot scheduling, so that it can take our
3517 newly created instructions into account. It also allows us to
3518 find branches with common targets more easily. */
3519
3520 static void
3521 split_branches (first)
3522 rtx first;
3523 {
3524 rtx insn;
3525 struct far_branch **uid_branch, *far_branch_list = 0;
3526 int max_uid = get_max_uid ();
3527
3528 /* Find out which branches are out of range. */
3529 shorten_branches (first);
3530
3531 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3532 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3533
3534 for (insn = first; insn; insn = NEXT_INSN (insn))
3535 if (! INSN_P (insn))
3536 continue;
3537 else if (INSN_DELETED_P (insn))
3538 {
3539 /* Shorten_branches would split this instruction again,
3540 so transform it into a note. */
3541 PUT_CODE (insn, NOTE);
3542 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3543 NOTE_SOURCE_FILE (insn) = 0;
3544 }
3545 else if (GET_CODE (insn) == JUMP_INSN
3546 /* Don't mess with ADDR_DIFF_VEC */
3547 && (GET_CODE (PATTERN (insn)) == SET
3548 || GET_CODE (PATTERN (insn)) == RETURN))
3549 {
3550 enum attr_type type = get_attr_type (insn);
3551 if (type == TYPE_CBRANCH)
3552 {
3553 rtx next, beyond;
3554
3555 if (get_attr_length (insn) > 4)
3556 {
3557 rtx src = SET_SRC (PATTERN (insn));
3558 rtx olabel = XEXP (XEXP (src, 1), 0);
3559 int addr = INSN_ADDRESSES (INSN_UID (insn));
3560 rtx label = 0;
3561 int dest_uid = get_dest_uid (olabel, max_uid);
3562 struct far_branch *bp = uid_branch[dest_uid];
3563
3564 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3565 the label if the LABEL_NUSES count drops to zero. There is
3566 always a jump_optimize pass that sets these values, but it
3567 proceeds to delete unreferenced code, and then if not
3568 optimizing, to un-delete the deleted instructions, thus
3569 leaving labels with too low uses counts. */
3570 if (! optimize)
3571 {
3572 JUMP_LABEL (insn) = olabel;
3573 LABEL_NUSES (olabel)++;
3574 }
3575 if (! bp)
3576 {
3577 bp = (struct far_branch *) alloca (sizeof *bp);
3578 uid_branch[dest_uid] = bp;
3579 bp->prev = far_branch_list;
3580 far_branch_list = bp;
3581 bp->far_label
3582 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3583 LABEL_NUSES (bp->far_label)++;
3584 }
3585 else
3586 {
3587 label = bp->near_label;
3588 if (! label && bp->address - addr >= CONDJUMP_MIN)
3589 {
3590 rtx block = bp->insert_place;
3591
3592 if (GET_CODE (PATTERN (block)) == RETURN)
3593 block = PREV_INSN (block);
3594 else
3595 block = gen_block_redirect (block,
3596 bp->address, 2);
3597 label = emit_label_after (gen_label_rtx (),
3598 PREV_INSN (block));
3599 bp->near_label = label;
3600 }
3601 else if (label && ! NEXT_INSN (label))
3602 {
3603 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3604 bp->insert_place = insn;
3605 else
3606 gen_far_branch (bp);
3607 }
3608 }
3609 if (! label
3610 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
3611 {
3612 bp->near_label = label = gen_label_rtx ();
3613 bp->insert_place = insn;
3614 bp->address = addr;
3615 }
3616 if (! redirect_jump (insn, label, 1))
3617 abort ();
3618 }
3619 else
3620 {
3621 /* get_attr_length (insn) == 2 */
3622 /* Check if we have a pattern where reorg wants to redirect
3623 the branch to a label from an unconditional branch that
3624 is too far away. */
3625 /* We can't use JUMP_LABEL here because it might be undefined
3626 when not optimizing. */
3627 /* A syntax error might cause beyond to be NULL_RTX. */
3628 beyond
3629 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3630 0));
3631
3632 if (beyond
3633 && (GET_CODE (beyond) == JUMP_INSN
3634 || ((beyond = next_active_insn (beyond))
3635 && GET_CODE (beyond) == JUMP_INSN))
3636 && GET_CODE (PATTERN (beyond)) == SET
3637 && recog_memoized (beyond) == CODE_FOR_jump
3638 && ((INSN_ADDRESSES
3639 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
3640 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
3641 > 252 + 258 + 2))
3642 gen_block_redirect (beyond,
3643 INSN_ADDRESSES (INSN_UID (beyond)), 1);
3644 }
3645
3646 next = next_active_insn (insn);
3647
3648 if ((GET_CODE (next) == JUMP_INSN
3649 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3650 && GET_CODE (PATTERN (next)) == SET
3651 && recog_memoized (next) == CODE_FOR_jump
3652 && ((INSN_ADDRESSES
3653 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
3654 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
3655 > 252 + 258 + 2))
3656 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
3657 }
3658 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3659 {
3660 int addr = INSN_ADDRESSES (INSN_UID (insn));
3661 rtx far_label = 0;
3662 int dest_uid = 0;
3663 struct far_branch *bp;
3664
3665 if (type == TYPE_JUMP)
3666 {
3667 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3668 dest_uid = get_dest_uid (far_label, max_uid);
3669 if (! dest_uid)
3670 {
3671 /* Parse errors can lead to labels outside
3672 the insn stream. */
3673 if (! NEXT_INSN (far_label))
3674 continue;
3675
3676 if (! optimize)
3677 {
3678 JUMP_LABEL (insn) = far_label;
3679 LABEL_NUSES (far_label)++;
3680 }
3681 redirect_jump (insn, NULL_RTX, 1);
3682 far_label = 0;
3683 }
3684 }
3685 bp = uid_branch[dest_uid];
3686 if (! bp)
3687 {
3688 bp = (struct far_branch *) alloca (sizeof *bp);
3689 uid_branch[dest_uid] = bp;
3690 bp->prev = far_branch_list;
3691 far_branch_list = bp;
3692 bp->near_label = 0;
3693 bp->far_label = far_label;
3694 if (far_label)
3695 LABEL_NUSES (far_label)++;
3696 }
3697 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3698 if (addr - bp->address <= CONDJUMP_MAX)
3699 emit_label_after (bp->near_label, PREV_INSN (insn));
3700 else
3701 {
3702 gen_far_branch (bp);
3703 bp->near_label = 0;
3704 }
3705 else
3706 bp->near_label = 0;
3707 bp->address = addr;
3708 bp->insert_place = insn;
3709 if (! far_label)
3710 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3711 else
3712 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3713 }
3714 }
3715 /* Generate all pending far branches,
3716 and free our references to the far labels. */
3717 while (far_branch_list)
3718 {
3719 if (far_branch_list->near_label
3720 && ! NEXT_INSN (far_branch_list->near_label))
3721 gen_far_branch (far_branch_list);
3722 if (optimize
3723 && far_branch_list->far_label
3724 && ! --LABEL_NUSES (far_branch_list->far_label))
3725 delete_insn (far_branch_list->far_label);
3726 far_branch_list = far_branch_list->prev;
3727 }
3728
3729 /* Instruction length information is no longer valid due to the new
3730 instructions that have been generated. */
3731 init_insn_lengths ();
3732 }
3733
3734 /* Dump out instruction addresses, which is useful for debugging the
3735 constant pool table stuff.
3736
3737 If relaxing, output the label and pseudo-ops used to link together
3738 calls and the instruction which set the registers. */
3739
3740 /* ??? This is unnecessary, and probably should be deleted. This makes
3741 the insn_addresses declaration above unnecessary. */
3742
3743 /* ??? The addresses printed by this routine for insns are nonsense for
3744 insns which are inside of a sequence where none of the inner insns have
3745 variable length. This is because the second pass of shorten_branches
3746 does not bother to update them. */
3747
3748 void
3749 final_prescan_insn (insn, opvec, noperands)
3750 rtx insn;
3751 rtx *opvec ATTRIBUTE_UNUSED;
3752 int noperands ATTRIBUTE_UNUSED;
3753 {
3754 if (TARGET_DUMPISIZE)
3755 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3756
3757 if (TARGET_RELAX)
3758 {
3759 rtx note;
3760
3761 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3762 if (note)
3763 {
3764 rtx pattern;
3765
3766 pattern = PATTERN (insn);
3767 if (GET_CODE (pattern) == PARALLEL)
3768 pattern = XVECEXP (pattern, 0, 0);
3769 if (GET_CODE (pattern) == CALL
3770 || (GET_CODE (pattern) == SET
3771 && (GET_CODE (SET_SRC (pattern)) == CALL
3772 || get_attr_type (insn) == TYPE_SFUNC)))
3773 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3774 CODE_LABEL_NUMBER (XEXP (note, 0)));
3775 else if (GET_CODE (pattern) == SET)
3776 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3777 CODE_LABEL_NUMBER (XEXP (note, 0)));
3778 else
3779 abort ();
3780 }
3781 }
3782 }
3783
3784 /* Dump out any constants accumulated in the final pass. These will
3785 only be labels. */
3786
3787 const char *
3788 output_jump_label_table ()
3789 {
3790 int i;
3791
3792 if (pool_size)
3793 {
3794 fprintf (asm_out_file, "\t.align 2\n");
3795 for (i = 0; i < pool_size; i++)
3796 {
3797 pool_node *p = &pool_vector[i];
3798
3799 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3800 CODE_LABEL_NUMBER (p->label));
3801 output_asm_insn (".long %O0", &p->value);
3802 }
3803 pool_size = 0;
3804 }
3805
3806 return "";
3807 }
3808 \f
3809 /* A full frame looks like:
3810
3811 arg-5
3812 arg-4
3813 [ if current_function_anonymous_args
3814 arg-3
3815 arg-2
3816 arg-1
3817 arg-0 ]
3818 saved-fp
3819 saved-r10
3820 saved-r11
3821 saved-r12
3822 saved-pr
3823 local-n
3824 ..
3825 local-1
3826 local-0 <- fp points here. */
3827
3828 /* Number of bytes pushed for anonymous args, used to pass information
3829 between expand_prologue and expand_epilogue. */
3830
3831 static int extra_push;
3832
3833 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3834 to be adjusted, and TEMP, if nonnegative, holds the register number
3835 of a general register that we may clobber. */
3836
3837 static void
3838 output_stack_adjust (size, reg, temp)
3839 int size;
3840 rtx reg;
3841 int temp;
3842 {
3843 if (size)
3844 {
3845 if (CONST_OK_FOR_I (size))
3846 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3847 /* Try to do it with two partial adjustments; however, we must make
3848 sure that the stack is properly aligned at all times, in case
3849 an interrupt occurs between the two partial adjustments. */
3850 else if (CONST_OK_FOR_I (size / 2 & -4)
3851 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3852 {
3853 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3854 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3855 }
3856 else
3857 {
3858 rtx const_reg;
3859
3860 /* If TEMP is invalid, we could temporarily save a general
3861 register to MACL. However, there is currently no need
3862 to handle this case, so just abort when we see it. */
3863 if (temp < 0)
3864 abort ();
3865 const_reg = gen_rtx_REG (SImode, temp);
3866
3867 /* If SIZE is negative, subtract the positive value.
3868 This sometimes allows a constant pool entry to be shared
3869 between prologue and epilogue code. */
3870 if (size < 0)
3871 {
3872 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3873 emit_insn (gen_subsi3 (reg, reg, const_reg));
3874 }
3875 else
3876 {
3877 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3878 emit_insn (gen_addsi3 (reg, reg, const_reg));
3879 }
3880 }
3881 }
3882 }
3883
3884 /* Output RTL to push register RN onto the stack. */
3885
3886 static void
3887 push (rn)
3888 int rn;
3889 {
3890 rtx x;
3891 if (rn == FPUL_REG)
3892 x = gen_push_fpul ();
3893 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3894 && FP_OR_XD_REGISTER_P (rn))
3895 {
3896 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3897 return;
3898 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
3899 }
3900 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3901 x = gen_push_e (gen_rtx_REG (SFmode, rn));
3902 else
3903 x = gen_push (gen_rtx_REG (SImode, rn));
3904
3905 x = emit_insn (x);
3906 REG_NOTES (x)
3907 = gen_rtx_EXPR_LIST (REG_INC,
3908 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3909 }
3910
3911 /* Output RTL to pop register RN from the stack. */
3912
3913 static void
3914 pop (rn)
3915 int rn;
3916 {
3917 rtx x;
3918 if (rn == FPUL_REG)
3919 x = gen_pop_fpul ();
3920 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3921 && FP_OR_XD_REGISTER_P (rn))
3922 {
3923 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3924 return;
3925 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
3926 }
3927 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3928 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
3929 else
3930 x = gen_pop (gen_rtx_REG (SImode, rn));
3931
3932 x = emit_insn (x);
3933 REG_NOTES (x)
3934 = gen_rtx_EXPR_LIST (REG_INC,
3935 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3936 }
3937
3938 /* Generate code to push the regs specified in the mask. */
3939
3940 static void
3941 push_regs (mask, mask2)
3942 int mask, mask2;
3943 {
3944 int i;
3945
3946 /* Push PR last; this gives better latencies after the prologue, and
3947 candidates for the return delay slot when there are no general
3948 registers pushed. */
3949 for (i = 0; i < 32; i++)
3950 if (mask & (1 << i) && i != PR_REG)
3951 push (i);
3952 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3953 if (mask2 & (1 << (i - 32)))
3954 push (i);
3955 if (mask & (1 << PR_REG))
3956 push (PR_REG);
3957 }
3958
3959 /* Work out the registers which need to be saved, both as a mask and a
3960 count of saved words.
3961
3962 If doing a pragma interrupt function, then push all regs used by the
3963 function, and if we call another function (we can tell by looking at PR),
3964 make sure that all the regs it clobbers are safe too. */
3965
3966 static int
3967 calc_live_regs (count_ptr, live_regs_mask2)
3968 int *count_ptr;
3969 int *live_regs_mask2;
3970 {
3971 int reg;
3972 int live_regs_mask = 0;
3973 int count;
3974 int interrupt_handler;
3975 rtx pr_initial;
3976 int pr_live;
3977
3978 if ((lookup_attribute
3979 ("interrupt_handler",
3980 DECL_ATTRIBUTES (current_function_decl)))
3981 != NULL_TREE)
3982 interrupt_handler = 1;
3983 else
3984 interrupt_handler = 0;
3985
3986 *live_regs_mask2 = 0;
3987 /* If we can save a lot of saves by switching to double mode, do that. */
3988 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3989 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3990 if (regs_ever_live[reg] && regs_ever_live[reg+1]
3991 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3992 && ++count > 2)
3993 {
3994 target_flags &= ~FPU_SINGLE_BIT;
3995 break;
3996 }
3997 pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
3998 pr_live = (pr_initial
3999 ? REGNO (pr_initial) != PR_REG
4000 : regs_ever_live[PR_REG]);
4001 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4002 {
4003 if (reg == PR_REG
4004 ? pr_live
4005 : (interrupt_handler && ! pragma_trapa)
4006 ? (/* Need to save all the regs ever live. */
4007 (regs_ever_live[reg]
4008 || (call_used_regs[reg]
4009 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4010 && pr_live))
4011 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4012 && reg != RETURN_ADDRESS_POINTER_REGNUM
4013 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
4014 : (/* Only push those regs which are used and need to be saved. */
4015 regs_ever_live[reg] && ! call_used_regs[reg]))
4016 {
4017 if (reg >= 32)
4018 *live_regs_mask2 |= 1 << (reg - 32);
4019 else
4020 live_regs_mask |= 1 << reg;
4021 count++;
4022 if (TARGET_SH4 && TARGET_FMOVD && FP_OR_XD_REGISTER_P (reg))
4023 {
4024 if (FP_REGISTER_P (reg))
4025 {
4026 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
4027 {
4028 if (reg >= 32)
4029 *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
4030 else
4031 live_regs_mask |= 1 << (reg ^ 1);
4032 count++;
4033 }
4034 }
4035 else /* if (XD_REGISTER_P (reg)) */
4036 {
4037 /* Must switch to double mode to access these registers. */
4038 target_flags &= ~FPU_SINGLE_BIT;
4039 count++;
4040 }
4041 }
4042 }
4043 }
4044
4045 *count_ptr = count * UNITS_PER_WORD;
4046 return live_regs_mask;
4047 }
4048
4049 /* Code to generate prologue and epilogue sequences */
4050
4051 /* PUSHED is the number of bytes that are bing pushed on the
4052 stack for register saves. Return the frame size, padded
4053 appropriately so that the stack stays properly aligned. */
4054 static HOST_WIDE_INT
4055 rounded_frame_size (pushed)
4056 int pushed;
4057 {
4058 HOST_WIDE_INT size = get_frame_size ();
4059 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4060
4061 return ((size + pushed + align - 1) & -align) - pushed;
4062 }
4063
4064 void
4065 sh_expand_prologue ()
4066 {
4067 int live_regs_mask;
4068 int d, i;
4069 int live_regs_mask2;
4070 int save_flags = target_flags;
4071
4072 current_function_interrupt
4073 = lookup_attribute ("interrupt_handler",
4074 DECL_ATTRIBUTES (current_function_decl))
4075 != NULL_TREE;
4076
4077 /* We have pretend args if we had an object sent partially in registers
4078 and partially on the stack, e.g. a large structure. */
4079 output_stack_adjust (-current_function_pretend_args_size,
4080 stack_pointer_rtx, 1);
4081
4082 extra_push = 0;
4083
4084 /* This is set by SETUP_VARARGS to indicate that this is a varargs
4085 routine. Clear it here so that the next function isn't affected. */
4086 if (current_function_anonymous_args)
4087 {
4088 current_function_anonymous_args = 0;
4089
4090 /* This is not used by the SH3E calling convention */
4091 if (! TARGET_SH3E && ! TARGET_HITACHI)
4092 {
4093 /* Push arg regs as if they'd been provided by caller in stack. */
4094 for (i = 0; i < NPARM_REGS(SImode); i++)
4095 {
4096 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
4097 if (i >= (NPARM_REGS(SImode)
4098 - current_function_args_info.arg_count[(int) SH_ARG_INT]
4099 ))
4100 break;
4101 push (rn);
4102 extra_push += 4;
4103 }
4104 }
4105 }
4106
4107 /* If we're supposed to switch stacks at function entry, do so now. */
4108 if (sp_switch)
4109 emit_insn (gen_sp_switch_1 ());
4110
4111 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4112 /* ??? Maybe we could save some switching if we can move a mode switch
4113 that already happens to be at the function start into the prologue. */
4114 if (target_flags != save_flags)
4115 emit_insn (gen_toggle_sz ());
4116
4117 push_regs (live_regs_mask, live_regs_mask2);
4118
4119 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
4120 {
4121 rtx insn = get_last_insn ();
4122 rtx last = emit_insn (gen_GOTaddr2picreg ());
4123
4124 /* Mark these insns as possibly dead. Sometimes, flow2 may
4125 delete all uses of the PIC register. In this case, let it
4126 delete the initialization too. */
4127 do
4128 {
4129 insn = NEXT_INSN (insn);
4130
4131 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4132 const0_rtx,
4133 REG_NOTES (insn));
4134 }
4135 while (insn != last);
4136 }
4137
4138 if (target_flags != save_flags)
4139 {
4140 rtx insn = emit_insn (gen_toggle_sz ());
4141
4142 /* If we're lucky, a mode switch in the function body will
4143 overwrite fpscr, turning this insn dead. Tell flow this
4144 insn is ok to delete. */
4145 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
4146 const0_rtx,
4147 REG_NOTES (insn));
4148 }
4149
4150 target_flags = save_flags;
4151
4152 output_stack_adjust (-rounded_frame_size (d),
4153 stack_pointer_rtx, 1);
4154
4155 if (frame_pointer_needed)
4156 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
4157 }
4158
4159 void
4160 sh_expand_epilogue ()
4161 {
4162 int live_regs_mask;
4163 int d, i;
4164
4165 int live_regs_mask2;
4166 int save_flags = target_flags;
4167 int frame_size;
4168
4169 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
4170
4171 frame_size = rounded_frame_size (d);
4172
4173 if (frame_pointer_needed)
4174 {
4175 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
4176
4177 /* We must avoid moving the stack pointer adjustment past code
4178 which reads from the local frame, else an interrupt could
4179 occur after the SP adjustment and clobber data in the local
4180 frame. */
4181 emit_insn (gen_blockage ());
4182 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
4183 }
4184 else if (frame_size)
4185 {
4186 /* We must avoid moving the stack pointer adjustment past code
4187 which reads from the local frame, else an interrupt could
4188 occur after the SP adjustment and clobber data in the local
4189 frame. */
4190 emit_insn (gen_blockage ());
4191 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
4192 }
4193
4194 /* Pop all the registers. */
4195
4196 if (target_flags != save_flags)
4197 emit_insn (gen_toggle_sz ());
4198 if (live_regs_mask & (1 << PR_REG))
4199 pop (PR_REG);
4200 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4201 {
4202 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
4203 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
4204 pop (j);
4205 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
4206 pop (j);
4207 }
4208 if (target_flags != save_flags)
4209 emit_insn (gen_toggle_sz ());
4210 target_flags = save_flags;
4211
4212 output_stack_adjust (extra_push + current_function_pretend_args_size,
4213 stack_pointer_rtx, 7);
4214
4215 /* Switch back to the normal stack if necessary. */
4216 if (sp_switch)
4217 emit_insn (gen_sp_switch_2 ());
4218
4219 /* Tell flow the insn that pops PR isn't dead. */
4220 if (live_regs_mask & (1 << PR_REG))
4221 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
4222 }
4223
4224 static int sh_need_epilogue_known = 0;
4225
4226 int
4227 sh_need_epilogue ()
4228 {
4229 if (! sh_need_epilogue_known)
4230 {
4231 rtx epilogue;
4232
4233 start_sequence ();
4234 sh_expand_epilogue ();
4235 epilogue = gen_sequence ();
4236 end_sequence ();
4237 sh_need_epilogue_known
4238 = (GET_CODE (epilogue) == SEQUENCE && XVECLEN (epilogue, 0) == 0
4239 ? -1 : 1);
4240 }
4241 return sh_need_epilogue_known > 0;
4242 }
4243
4244 /* Clear variables at function end. */
4245
4246 static void
4247 sh_output_function_epilogue (file, size)
4248 FILE *file ATTRIBUTE_UNUSED;
4249 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4250 {
4251 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
4252 sh_need_epilogue_known = 0;
4253 sp_switch = NULL_RTX;
4254 }
4255
4256 rtx
4257 sh_builtin_saveregs ()
4258 {
4259 /* First unnamed integer register. */
4260 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
4261 /* Number of integer registers we need to save. */
4262 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
4263 /* First unnamed SFmode float reg */
4264 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
4265 /* Number of SFmode float regs to save. */
4266 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
4267 rtx regbuf, fpregs;
4268 int bufsize, regno;
4269 HOST_WIDE_INT alias_set;
4270
4271 /* Allocate block of memory for the regs. */
4272 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
4273 Or can assign_stack_local accept a 0 SIZE argument? */
4274 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
4275
4276 regbuf = assign_stack_local (BLKmode, bufsize, 0);
4277 alias_set = get_varargs_alias_set ();
4278 set_mem_alias_set (regbuf, alias_set);
4279
4280 /* Save int args.
4281 This is optimized to only save the regs that are necessary. Explicitly
4282 named args need not be saved. */
4283 if (n_intregs > 0)
4284 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
4285 adjust_address (regbuf, BLKmode,
4286 n_floatregs * UNITS_PER_WORD),
4287 n_intregs, n_intregs * UNITS_PER_WORD);
4288
4289 /* Save float args.
4290 This is optimized to only save the regs that are necessary. Explicitly
4291 named args need not be saved.
4292 We explicitly build a pointer to the buffer because it halves the insn
4293 count when not optimizing (otherwise the pointer is built for each reg
4294 saved).
4295 We emit the moves in reverse order so that we can use predecrement. */
4296
4297 fpregs = gen_reg_rtx (Pmode);
4298 emit_move_insn (fpregs, XEXP (regbuf, 0));
4299 emit_insn (gen_addsi3 (fpregs, fpregs,
4300 GEN_INT (n_floatregs * UNITS_PER_WORD)));
4301 if (TARGET_SH4)
4302 {
4303 rtx mem;
4304 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
4305 {
4306 emit_insn (gen_addsi3 (fpregs, fpregs,
4307 GEN_INT (-2 * UNITS_PER_WORD)));
4308 mem = gen_rtx_MEM (DFmode, fpregs);
4309 set_mem_alias_set (mem, alias_set);
4310 emit_move_insn (mem,
4311 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
4312 }
4313 regno = first_floatreg;
4314 if (regno & 1)
4315 {
4316 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4317 mem = gen_rtx_MEM (SFmode, fpregs);
4318 set_mem_alias_set (mem, alias_set);
4319 emit_move_insn (mem,
4320 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
4321 - (TARGET_LITTLE_ENDIAN != 0)));
4322 }
4323 }
4324 else
4325 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
4326 {
4327 rtx mem;
4328
4329 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4330 mem = gen_rtx_MEM (SFmode, fpregs);
4331 set_mem_alias_set (mem, alias_set);
4332 emit_move_insn (mem,
4333 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
4334 }
4335
4336 /* Return the address of the regbuf. */
4337 return XEXP (regbuf, 0);
4338 }
4339
4340 /* Define the `__builtin_va_list' type for the ABI. */
4341
4342 tree
4343 sh_build_va_list ()
4344 {
4345 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4346 tree record;
4347
4348 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4349 return ptr_type_node;
4350
4351 record = make_node (RECORD_TYPE);
4352
4353 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
4354 ptr_type_node);
4355 f_next_o_limit = build_decl (FIELD_DECL,
4356 get_identifier ("__va_next_o_limit"),
4357 ptr_type_node);
4358 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
4359 ptr_type_node);
4360 f_next_fp_limit = build_decl (FIELD_DECL,
4361 get_identifier ("__va_next_fp_limit"),
4362 ptr_type_node);
4363 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
4364 ptr_type_node);
4365
4366 DECL_FIELD_CONTEXT (f_next_o) = record;
4367 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
4368 DECL_FIELD_CONTEXT (f_next_fp) = record;
4369 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
4370 DECL_FIELD_CONTEXT (f_next_stack) = record;
4371
4372 TYPE_FIELDS (record) = f_next_o;
4373 TREE_CHAIN (f_next_o) = f_next_o_limit;
4374 TREE_CHAIN (f_next_o_limit) = f_next_fp;
4375 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
4376 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
4377
4378 layout_type (record);
4379
4380 return record;
4381 }
4382
4383 /* Implement `va_start' for varargs and stdarg. */
4384
4385 void
4386 sh_va_start (stdarg_p, valist, nextarg)
4387 int stdarg_p;
4388 tree valist;
4389 rtx nextarg;
4390 {
4391 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4392 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4393 tree t, u;
4394 int nfp, nint;
4395
4396 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4397 {
4398 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4399 return;
4400 }
4401
4402 f_next_o = TYPE_FIELDS (va_list_type_node);
4403 f_next_o_limit = TREE_CHAIN (f_next_o);
4404 f_next_fp = TREE_CHAIN (f_next_o_limit);
4405 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4406 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4407
4408 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4409 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4410 valist, f_next_o_limit);
4411 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
4412 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4413 valist, f_next_fp_limit);
4414 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4415 valist, f_next_stack);
4416
4417 /* Call __builtin_saveregs. */
4418 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
4419 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
4420 TREE_SIDE_EFFECTS (t) = 1;
4421 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4422
4423 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
4424 if (nfp < 8)
4425 nfp = 8 - nfp;
4426 else
4427 nfp = 0;
4428 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4429 build_int_2 (UNITS_PER_WORD * nfp, 0)));
4430 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
4431 TREE_SIDE_EFFECTS (t) = 1;
4432 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4433
4434 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
4435 TREE_SIDE_EFFECTS (t) = 1;
4436 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4437
4438 nint = current_function_args_info.arg_count[SH_ARG_INT];
4439 if (nint < 4)
4440 nint = 4 - nint;
4441 else
4442 nint = 0;
4443 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4444 build_int_2 (UNITS_PER_WORD * nint, 0)));
4445 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
4446 TREE_SIDE_EFFECTS (t) = 1;
4447 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4448
4449 u = make_tree (ptr_type_node, nextarg);
4450 if (! stdarg_p && (nint == 0 || nfp == 0))
4451 {
4452 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4453 build_int_2 (-UNITS_PER_WORD, -1)));
4454 }
4455 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
4456 TREE_SIDE_EFFECTS (t) = 1;
4457 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4458 }
4459
4460 /* Implement `va_arg'. */
4461
4462 rtx
4463 sh_va_arg (valist, type)
4464 tree valist, type;
4465 {
4466 HOST_WIDE_INT size, rsize;
4467 tree tmp, pptr_type_node;
4468 rtx addr_rtx, r;
4469
4470 size = int_size_in_bytes (type);
4471 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4472 pptr_type_node = build_pointer_type (ptr_type_node);
4473
4474 if ((TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
4475 {
4476 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4477 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4478 int pass_as_float;
4479 rtx lab_false, lab_over;
4480
4481 f_next_o = TYPE_FIELDS (va_list_type_node);
4482 f_next_o_limit = TREE_CHAIN (f_next_o);
4483 f_next_fp = TREE_CHAIN (f_next_o_limit);
4484 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4485 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4486
4487 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4488 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4489 valist, f_next_o_limit);
4490 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
4491 valist, f_next_fp);
4492 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4493 valist, f_next_fp_limit);
4494 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4495 valist, f_next_stack);
4496
4497 if (TARGET_SH4)
4498 {
4499 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
4500 || (TREE_CODE (type) == COMPLEX_TYPE
4501 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
4502 && size <= 16));
4503 }
4504 else
4505 {
4506 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
4507 }
4508
4509 addr_rtx = gen_reg_rtx (Pmode);
4510 lab_false = gen_label_rtx ();
4511 lab_over = gen_label_rtx ();
4512
4513 if (pass_as_float)
4514 {
4515 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
4516 EXPAND_NORMAL),
4517 expand_expr (next_fp_limit, NULL_RTX,
4518 Pmode, EXPAND_NORMAL),
4519 GE, const1_rtx, Pmode, 1, lab_false);
4520
4521 if (TYPE_ALIGN (type) > BITS_PER_WORD)
4522 {
4523 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
4524 build_int_2 (UNITS_PER_WORD, 0));
4525 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
4526 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
4527 TREE_SIDE_EFFECTS (tmp) = 1;
4528 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4529 }
4530
4531 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
4532 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4533 if (r != addr_rtx)
4534 emit_move_insn (addr_rtx, r);
4535
4536 emit_jump_insn (gen_jump (lab_over));
4537 emit_barrier ();
4538 emit_label (lab_false);
4539
4540 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4541 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4542 if (r != addr_rtx)
4543 emit_move_insn (addr_rtx, r);
4544 }
4545 else
4546 {
4547 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
4548 build_int_2 (rsize, 0));
4549
4550 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
4551 EXPAND_NORMAL),
4552 expand_expr (next_o_limit, NULL_RTX,
4553 Pmode, EXPAND_NORMAL),
4554 GT, const1_rtx, Pmode, 1, lab_false);
4555
4556 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
4557 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4558 if (r != addr_rtx)
4559 emit_move_insn (addr_rtx, r);
4560
4561 emit_jump_insn (gen_jump (lab_over));
4562 emit_barrier ();
4563 emit_label (lab_false);
4564
4565 if (size > 4 && ! TARGET_SH4)
4566 {
4567 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
4568 TREE_SIDE_EFFECTS (tmp) = 1;
4569 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4570 }
4571
4572 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4573 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4574 if (r != addr_rtx)
4575 emit_move_insn (addr_rtx, r);
4576 }
4577
4578 emit_label (lab_over);
4579
4580 tmp = make_tree (pptr_type_node, addr_rtx);
4581 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
4582 }
4583
4584 /* ??? In va-sh.h, there had been code to make values larger than
4585 size 8 indirect. This does not match the FUNCTION_ARG macros. */
4586
4587 return std_expand_builtin_va_arg (valist, type);
4588 }
4589
4590 /* Define the offset between two registers, one to be eliminated, and
4591 the other its replacement, at the start of a routine. */
4592
4593 int
4594 initial_elimination_offset (from, to)
4595 int from;
4596 int to;
4597 {
4598 int regs_saved;
4599 int total_saved_regs_space;
4600 int total_auto_space;
4601 int save_flags = target_flags;
4602
4603 int live_regs_mask, live_regs_mask2;
4604 live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
4605 total_auto_space = rounded_frame_size (regs_saved);
4606 target_flags = save_flags;
4607
4608 total_saved_regs_space = regs_saved;
4609
4610 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4611 return total_saved_regs_space + total_auto_space;
4612
4613 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4614 return total_saved_regs_space + total_auto_space;
4615
4616 /* Initial gap between fp and sp is 0. */
4617 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4618 return 0;
4619
4620 if (from == RETURN_ADDRESS_POINTER_REGNUM
4621 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
4622 return total_auto_space;
4623
4624 abort ();
4625 }
4626 \f
4627 /* Handle machine specific pragmas to be semi-compatible with Hitachi
4628 compiler. */
4629
4630 void
4631 sh_pr_interrupt (pfile)
4632 cpp_reader *pfile ATTRIBUTE_UNUSED;
4633 {
4634 pragma_interrupt = 1;
4635 }
4636
4637 void
4638 sh_pr_trapa (pfile)
4639 cpp_reader *pfile ATTRIBUTE_UNUSED;
4640 {
4641 pragma_interrupt = pragma_trapa = 1;
4642 }
4643
4644 void
4645 sh_pr_nosave_low_regs (pfile)
4646 cpp_reader *pfile ATTRIBUTE_UNUSED;
4647 {
4648 pragma_nosave_low_regs = 1;
4649 }
4650
4651 /* Generate 'handle_interrupt' attribute for decls */
4652
4653 static void
4654 sh_insert_attributes (node, attributes)
4655 tree node;
4656 tree * attributes;
4657 {
4658 if (! pragma_interrupt
4659 || TREE_CODE (node) != FUNCTION_DECL)
4660 return;
4661
4662 /* We are only interested in fields. */
4663 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4664 return;
4665
4666 /* Add a 'handle_interrupt' attribute. */
4667 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4668
4669 return;
4670 }
4671
4672 /* Supported attributes:
4673
4674 interrupt_handler -- specifies this function is an interrupt handler.
4675
4676 sp_switch -- specifies an alternate stack for an interrupt handler
4677 to run on.
4678
4679 trap_exit -- use a trapa to exit an interrupt function instead of
4680 an rte instruction. */
4681
4682 const struct attribute_spec sh_attribute_table[] =
4683 {
4684 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
4685 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
4686 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
4687 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
4688 { NULL, 0, 0, false, false, false, NULL }
4689 };
4690
4691 /* Handle an "interrupt_handler" attribute; arguments as in
4692 struct attribute_spec.handler. */
4693 static tree
4694 sh_handle_interrupt_handler_attribute (node, name, args, flags, no_add_attrs)
4695 tree *node;
4696 tree name;
4697 tree args ATTRIBUTE_UNUSED;
4698 int flags ATTRIBUTE_UNUSED;
4699 bool *no_add_attrs;
4700 {
4701 if (TREE_CODE (*node) != FUNCTION_DECL)
4702 {
4703 warning ("`%s' attribute only applies to functions",
4704 IDENTIFIER_POINTER (name));
4705 *no_add_attrs = true;
4706 }
4707
4708 return NULL_TREE;
4709 }
4710
4711 /* Handle an "sp_switch" attribute; arguments as in
4712 struct attribute_spec.handler. */
4713 static tree
4714 sh_handle_sp_switch_attribute (node, name, args, flags, no_add_attrs)
4715 tree *node;
4716 tree name;
4717 tree args;
4718 int flags ATTRIBUTE_UNUSED;
4719 bool *no_add_attrs;
4720 {
4721 if (TREE_CODE (*node) != FUNCTION_DECL)
4722 {
4723 warning ("`%s' attribute only applies to functions",
4724 IDENTIFIER_POINTER (name));
4725 *no_add_attrs = true;
4726 }
4727 else if (!pragma_interrupt)
4728 {
4729 /* The sp_switch attribute only has meaning for interrupt functions. */
4730 warning ("`%s' attribute only applies to interrupt functions",
4731 IDENTIFIER_POINTER (name));
4732 *no_add_attrs = true;
4733 }
4734 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4735 {
4736 /* The argument must be a constant string. */
4737 warning ("`%s' attribute argument not a string constant",
4738 IDENTIFIER_POINTER (name));
4739 *no_add_attrs = true;
4740 }
4741 else
4742 {
4743 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
4744 TREE_STRING_POINTER (TREE_VALUE (args)));
4745 }
4746
4747 return NULL_TREE;
4748 }
4749
4750 /* Handle an "trap_exit" attribute; arguments as in
4751 struct attribute_spec.handler. */
4752 static tree
4753 sh_handle_trap_exit_attribute (node, name, args, flags, no_add_attrs)
4754 tree *node;
4755 tree name;
4756 tree args;
4757 int flags ATTRIBUTE_UNUSED;
4758 bool *no_add_attrs;
4759 {
4760 if (TREE_CODE (*node) != FUNCTION_DECL)
4761 {
4762 warning ("`%s' attribute only applies to functions",
4763 IDENTIFIER_POINTER (name));
4764 *no_add_attrs = true;
4765 }
4766 else if (!pragma_interrupt)
4767 {
4768 /* The trap_exit attribute only has meaning for interrupt functions. */
4769 warning ("`%s' attribute only applies to interrupt functions",
4770 IDENTIFIER_POINTER (name));
4771 *no_add_attrs = true;
4772 }
4773 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4774 {
4775 /* The argument must be a constant integer. */
4776 warning ("`%s' attribute argument not an integer constant",
4777 IDENTIFIER_POINTER (name));
4778 *no_add_attrs = true;
4779 }
4780 else
4781 {
4782 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4783 }
4784
4785 return NULL_TREE;
4786 }
4787
4788 \f
4789 /* Predicates used by the templates. */
4790
4791 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
4792 Used only in general_movsrc_operand. */
4793
4794 int
4795 system_reg_operand (op, mode)
4796 rtx op;
4797 enum machine_mode mode ATTRIBUTE_UNUSED;
4798 {
4799 switch (REGNO (op))
4800 {
4801 case PR_REG:
4802 case MACL_REG:
4803 case MACH_REG:
4804 return 1;
4805 }
4806 return 0;
4807 }
4808
4809 /* Returns 1 if OP can be source of a simple move operation.
4810 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4811 invalid as are subregs of system registers. */
4812
4813 int
4814 general_movsrc_operand (op, mode)
4815 rtx op;
4816 enum machine_mode mode;
4817 {
4818 if (GET_CODE (op) == MEM)
4819 {
4820 rtx inside = XEXP (op, 0);
4821 if (GET_CODE (inside) == CONST)
4822 inside = XEXP (inside, 0);
4823
4824 if (GET_CODE (inside) == LABEL_REF)
4825 return 1;
4826
4827 if (GET_CODE (inside) == PLUS
4828 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4829 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4830 return 1;
4831
4832 /* Only post inc allowed. */
4833 if (GET_CODE (inside) == PRE_DEC)
4834 return 0;
4835 }
4836
4837 if ((mode == QImode || mode == HImode)
4838 && (GET_CODE (op) == SUBREG
4839 && GET_CODE (XEXP (op, 0)) == REG
4840 && system_reg_operand (XEXP (op, 0), mode)))
4841 return 0;
4842
4843 return general_operand (op, mode);
4844 }
4845
4846 /* Returns 1 if OP can be a destination of a move.
4847 Same as general_operand, but no preinc allowed. */
4848
4849 int
4850 general_movdst_operand (op, mode)
4851 rtx op;
4852 enum machine_mode mode;
4853 {
4854 /* Only pre dec allowed. */
4855 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4856 return 0;
4857
4858 return general_operand (op, mode);
4859 }
4860
4861 /* Returns 1 if OP is a normal arithmetic register. */
4862
4863 int
4864 arith_reg_operand (op, mode)
4865 rtx op;
4866 enum machine_mode mode;
4867 {
4868 if (register_operand (op, mode))
4869 {
4870 int regno;
4871
4872 if (GET_CODE (op) == REG)
4873 regno = REGNO (op);
4874 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4875 regno = REGNO (SUBREG_REG (op));
4876 else
4877 return 1;
4878
4879 return (regno != T_REG && regno != PR_REG
4880 && (regno != FPUL_REG || TARGET_SH4)
4881 && regno != MACH_REG && regno != MACL_REG);
4882 }
4883 return 0;
4884 }
4885
4886 int
4887 fp_arith_reg_operand (op, mode)
4888 rtx op;
4889 enum machine_mode mode;
4890 {
4891 if (register_operand (op, mode))
4892 {
4893 int regno;
4894
4895 if (GET_CODE (op) == REG)
4896 regno = REGNO (op);
4897 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4898 regno = REGNO (SUBREG_REG (op));
4899 else
4900 return 1;
4901
4902 return (regno >= FIRST_PSEUDO_REGISTER
4903 || FP_REGISTER_P (regno));
4904 }
4905 return 0;
4906 }
4907
4908 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
4909
4910 int
4911 arith_operand (op, mode)
4912 rtx op;
4913 enum machine_mode mode;
4914 {
4915 if (arith_reg_operand (op, mode))
4916 return 1;
4917
4918 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4919 return 1;
4920
4921 return 0;
4922 }
4923
4924 /* Returns 1 if OP is a valid source operand for a compare insn. */
4925
4926 int
4927 arith_reg_or_0_operand (op, mode)
4928 rtx op;
4929 enum machine_mode mode;
4930 {
4931 if (arith_reg_operand (op, mode))
4932 return 1;
4933
4934 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4935 return 1;
4936
4937 return 0;
4938 }
4939
4940 /* Returns 1 if OP is a valid source operand for a logical operation. */
4941
4942 int
4943 logical_operand (op, mode)
4944 rtx op;
4945 enum machine_mode mode;
4946 {
4947 if (arith_reg_operand (op, mode))
4948 return 1;
4949
4950 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4951 return 1;
4952
4953 return 0;
4954 }
4955
4956 /* Nonzero if OP is a floating point value with value 0.0. */
4957
4958 int
4959 fp_zero_operand (op)
4960 rtx op;
4961 {
4962 REAL_VALUE_TYPE r;
4963
4964 if (GET_MODE (op) != SFmode)
4965 return 0;
4966
4967 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4968 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4969 }
4970
4971 /* Nonzero if OP is a floating point value with value 1.0. */
4972
4973 int
4974 fp_one_operand (op)
4975 rtx op;
4976 {
4977 REAL_VALUE_TYPE r;
4978
4979 if (GET_MODE (op) != SFmode)
4980 return 0;
4981
4982 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4983 return REAL_VALUES_EQUAL (r, dconst1);
4984 }
4985
4986 /* For -m4 and -m4-single-only, mode switching is used. If we are
4987 compiling without -mfmovd, movsf_ie isn't taken into account for
4988 mode switching. We could check in machine_dependent_reorg for
4989 cases where we know we are in single precision mode, but there is
4990 interface to find that out during reload, so we must avoid
4991 choosing an fldi alternative during reload and thus failing to
4992 allocate a scratch register for the constant loading. */
4993 int
4994 fldi_ok ()
4995 {
4996 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
4997 }
4998
4999 int
5000 tertiary_reload_operand (op, mode)
5001 rtx op;
5002 enum machine_mode mode ATTRIBUTE_UNUSED;
5003 {
5004 enum rtx_code code = GET_CODE (op);
5005 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
5006 }
5007
5008 int
5009 fpscr_operand (op, mode)
5010 rtx op;
5011 enum machine_mode mode ATTRIBUTE_UNUSED;
5012 {
5013 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
5014 && GET_MODE (op) == PSImode);
5015 }
5016
5017 int
5018 fpul_operand (op, mode)
5019 rtx op;
5020 enum machine_mode mode;
5021 {
5022 return (GET_CODE (op) == REG
5023 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
5024 && GET_MODE (op) == mode);
5025 }
5026
5027 int
5028 symbol_ref_operand (op, mode)
5029 rtx op;
5030 enum machine_mode mode ATTRIBUTE_UNUSED;
5031 {
5032 return (GET_CODE (op) == SYMBOL_REF);
5033 }
5034
5035 int
5036 commutative_float_operator (op, mode)
5037 rtx op;
5038 enum machine_mode mode;
5039 {
5040 if (GET_MODE (op) != mode)
5041 return 0;
5042 switch (GET_CODE (op))
5043 {
5044 case PLUS:
5045 case MULT:
5046 return 1;
5047 default:
5048 break;
5049 }
5050 return 0;
5051 }
5052
5053 int
5054 noncommutative_float_operator (op, mode)
5055 rtx op;
5056 enum machine_mode mode;
5057 {
5058 if (GET_MODE (op) != mode)
5059 return 0;
5060 switch (GET_CODE (op))
5061 {
5062 case MINUS:
5063 case DIV:
5064 return 1;
5065 default:
5066 break;
5067 }
5068 return 0;
5069 }
5070
5071 int
5072 binary_float_operator (op, mode)
5073 rtx op;
5074 enum machine_mode mode;
5075 {
5076 if (GET_MODE (op) != mode)
5077 return 0;
5078 switch (GET_CODE (op))
5079 {
5080 case PLUS:
5081 case MINUS:
5082 case MULT:
5083 case DIV:
5084 return 1;
5085 default:
5086 break;
5087 }
5088 return 0;
5089 }
5090 \f
5091 /* Return the destination address of a branch. */
5092
5093 static int
5094 branch_dest (branch)
5095 rtx branch;
5096 {
5097 rtx dest = SET_SRC (PATTERN (branch));
5098 int dest_uid;
5099
5100 if (GET_CODE (dest) == IF_THEN_ELSE)
5101 dest = XEXP (dest, 1);
5102 dest = XEXP (dest, 0);
5103 dest_uid = INSN_UID (dest);
5104 return INSN_ADDRESSES (dest_uid);
5105 }
5106 \f
5107 /* Return non-zero if REG is not used after INSN.
5108 We assume REG is a reload reg, and therefore does
5109 not live past labels. It may live past calls or jumps though. */
5110 int
5111 reg_unused_after (reg, insn)
5112 rtx reg;
5113 rtx insn;
5114 {
5115 enum rtx_code code;
5116 rtx set;
5117
5118 /* If the reg is set by this instruction, then it is safe for our
5119 case. Disregard the case where this is a store to memory, since
5120 we are checking a register used in the store address. */
5121 set = single_set (insn);
5122 if (set && GET_CODE (SET_DEST (set)) != MEM
5123 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5124 return 1;
5125
5126 while ((insn = NEXT_INSN (insn)))
5127 {
5128 code = GET_CODE (insn);
5129
5130 #if 0
5131 /* If this is a label that existed before reload, then the register
5132 if dead here. However, if this is a label added by reorg, then
5133 the register may still be live here. We can't tell the difference,
5134 so we just ignore labels completely. */
5135 if (code == CODE_LABEL)
5136 return 1;
5137 /* else */
5138 #endif
5139
5140 if (code == JUMP_INSN)
5141 return 0;
5142
5143 /* If this is a sequence, we must handle them all at once.
5144 We could have for instance a call that sets the target register,
5145 and a insn in a delay slot that uses the register. In this case,
5146 we must return 0. */
5147 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
5148 {
5149 int i;
5150 int retval = 0;
5151
5152 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
5153 {
5154 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
5155 rtx set = single_set (this_insn);
5156
5157 if (GET_CODE (this_insn) == CALL_INSN)
5158 code = CALL_INSN;
5159 else if (GET_CODE (this_insn) == JUMP_INSN)
5160 {
5161 if (INSN_ANNULLED_BRANCH_P (this_insn))
5162 return 0;
5163 code = JUMP_INSN;
5164 }
5165
5166 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5167 return 0;
5168 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5169 {
5170 if (GET_CODE (SET_DEST (set)) != MEM)
5171 retval = 1;
5172 else
5173 return 0;
5174 }
5175 if (set == 0
5176 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
5177 return 0;
5178 }
5179 if (retval == 1)
5180 return 1;
5181 else if (code == JUMP_INSN)
5182 return 0;
5183 }
5184 else if (GET_RTX_CLASS (code) == 'i')
5185 {
5186 rtx set = single_set (insn);
5187
5188 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
5189 return 0;
5190 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
5191 return GET_CODE (SET_DEST (set)) != MEM;
5192 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
5193 return 0;
5194 }
5195
5196 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
5197 return 1;
5198 }
5199 return 1;
5200 }
5201 \f
5202 #include "ggc.h"
5203
5204 rtx
5205 get_fpscr_rtx ()
5206 {
5207 static rtx fpscr_rtx;
5208
5209 if (! fpscr_rtx)
5210 {
5211 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
5212 REG_USERVAR_P (fpscr_rtx) = 1;
5213 ggc_add_rtx_root (&fpscr_rtx, 1);
5214 mark_user_reg (fpscr_rtx);
5215 }
5216 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
5217 mark_user_reg (fpscr_rtx);
5218 return fpscr_rtx;
5219 }
5220
5221 void
5222 emit_sf_insn (pat)
5223 rtx pat;
5224 {
5225 emit_insn (pat);
5226 }
5227
5228 void
5229 emit_df_insn (pat)
5230 rtx pat;
5231 {
5232 emit_insn (pat);
5233 }
5234
5235 void
5236 expand_sf_unop (fun, operands)
5237 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5238 rtx *operands;
5239 {
5240 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5241 }
5242
5243 void
5244 expand_sf_binop (fun, operands)
5245 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5246 rtx *operands;
5247 {
5248 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
5249 get_fpscr_rtx ()));
5250 }
5251
5252 void
5253 expand_df_unop (fun, operands)
5254 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5255 rtx *operands;
5256 {
5257 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5258 }
5259
5260 void
5261 expand_df_binop (fun, operands)
5262 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5263 rtx *operands;
5264 {
5265 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
5266 get_fpscr_rtx ()));
5267 }
5268 \f
5269 /* ??? gcc does flow analysis strictly after common subexpression
5270 elimination. As a result, common subespression elimination fails
5271 when there are some intervening statements setting the same register.
5272 If we did nothing about this, this would hurt the precision switching
5273 for SH4 badly. There is some cse after reload, but it is unable to
5274 undo the extra register pressure from the unused instructions, and
5275 it cannot remove auto-increment loads.
5276
5277 A C code example that shows this flow/cse weakness for (at least) SH
5278 and sparc (as of gcc ss-970706) is this:
5279
5280 double
5281 f(double a)
5282 {
5283 double d;
5284 d = 0.1;
5285 a += d;
5286 d = 1.1;
5287 d = 0.1;
5288 a *= d;
5289 return a;
5290 }
5291
5292 So we add another pass before common subexpression elimination, to
5293 remove assignments that are dead due to a following assignment in the
5294 same basic block. */
5295
5296 static void
5297 mark_use (x, reg_set_block)
5298 rtx x, *reg_set_block;
5299 {
5300 enum rtx_code code;
5301
5302 if (! x)
5303 return;
5304 code = GET_CODE (x);
5305 switch (code)
5306 {
5307 case REG:
5308 {
5309 int regno = REGNO (x);
5310 int nregs = (regno < FIRST_PSEUDO_REGISTER
5311 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
5312 : 1);
5313 do
5314 {
5315 reg_set_block[regno + nregs - 1] = 0;
5316 }
5317 while (--nregs);
5318 break;
5319 }
5320 case SET:
5321 {
5322 rtx dest = SET_DEST (x);
5323
5324 if (GET_CODE (dest) == SUBREG)
5325 dest = SUBREG_REG (dest);
5326 if (GET_CODE (dest) != REG)
5327 mark_use (dest, reg_set_block);
5328 mark_use (SET_SRC (x), reg_set_block);
5329 break;
5330 }
5331 case CLOBBER:
5332 break;
5333 default:
5334 {
5335 const char *fmt = GET_RTX_FORMAT (code);
5336 int i, j;
5337 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5338 {
5339 if (fmt[i] == 'e')
5340 mark_use (XEXP (x, i), reg_set_block);
5341 else if (fmt[i] == 'E')
5342 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5343 mark_use (XVECEXP (x, i, j), reg_set_block);
5344 }
5345 break;
5346 }
5347 }
5348 }
5349 \f
5350 static rtx get_free_reg PARAMS ((HARD_REG_SET));
5351
5352 /* This function returns a register to use to load the address to load
5353 the fpscr from. Currently it always returns r1 or r7, but when we are
5354 able to use pseudo registers after combine, or have a better mechanism
5355 for choosing a register, it should be done here. */
5356 /* REGS_LIVE is the liveness information for the point for which we
5357 need this allocation. In some bare-bones exit blocks, r1 is live at the
5358 start. We can even have all of r0..r3 being live:
5359 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
5360 INSN before which new insns are placed with will clobber the register
5361 we return. If a basic block consists only of setting the return value
5362 register to a pseudo and using that register, the return value is not
5363 live before or after this block, yet we we'll insert our insns right in
5364 the middle. */
5365
5366 static rtx
5367 get_free_reg (regs_live)
5368 HARD_REG_SET regs_live;
5369 {
5370 if (! TEST_HARD_REG_BIT (regs_live, 1))
5371 return gen_rtx_REG (Pmode, 1);
5372
5373 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
5374 there shouldn't be anything but a jump before the function end. */
5375 if (! TEST_HARD_REG_BIT (regs_live, 7))
5376 return gen_rtx_REG (Pmode, 7);
5377
5378 abort ();
5379 }
5380
5381 /* This function will set the fpscr from memory.
5382 MODE is the mode we are setting it to. */
5383 void
5384 fpscr_set_from_mem (mode, regs_live)
5385 int mode;
5386 HARD_REG_SET regs_live;
5387 {
5388 enum attr_fp_mode fp_mode = mode;
5389 rtx addr_reg = get_free_reg (regs_live);
5390
5391 if (fp_mode == (enum attr_fp_mode) NORMAL_MODE (FP_MODE))
5392 emit_insn (gen_fpu_switch1 (addr_reg));
5393 else
5394 emit_insn (gen_fpu_switch0 (addr_reg));
5395 }
5396
5397 /* Is the given character a logical line separator for the assembler? */
5398 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
5399 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
5400 #endif
5401
5402 int
5403 sh_insn_length_adjustment (insn)
5404 rtx insn;
5405 {
5406 /* Instructions with unfilled delay slots take up an extra two bytes for
5407 the nop in the delay slot. */
5408 if (((GET_CODE (insn) == INSN
5409 && GET_CODE (PATTERN (insn)) != USE
5410 && GET_CODE (PATTERN (insn)) != CLOBBER)
5411 || GET_CODE (insn) == CALL_INSN
5412 || (GET_CODE (insn) == JUMP_INSN
5413 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5414 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
5415 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
5416 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
5417 return 2;
5418
5419 /* sh-dsp parallel processing insn take four bytes instead of two. */
5420
5421 if (GET_CODE (insn) == INSN)
5422 {
5423 int sum = 0;
5424 rtx body = PATTERN (insn);
5425 const char *template;
5426 char c;
5427 int maybe_label = 1;
5428
5429 if (GET_CODE (body) == ASM_INPUT)
5430 template = XSTR (body, 0);
5431 else if (asm_noperands (body) >= 0)
5432 template
5433 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
5434 else
5435 return 0;
5436 do
5437 {
5438 int ppi_adjust = 0;
5439
5440 do
5441 c = *template++;
5442 while (c == ' ' || c == '\t');
5443 /* all sh-dsp parallel-processing insns start with p.
5444 The only non-ppi sh insn starting with p is pref.
5445 The only ppi starting with pr is prnd. */
5446 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
5447 ppi_adjust = 2;
5448 /* The repeat pseudo-insn expands two three insns, a total of
5449 six bytes in size. */
5450 else if ((c == 'r' || c == 'R')
5451 && ! strncasecmp ("epeat", template, 5))
5452 ppi_adjust = 4;
5453 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
5454 {
5455 /* If this is a label, it is obviously not a ppi insn. */
5456 if (c == ':' && maybe_label)
5457 {
5458 ppi_adjust = 0;
5459 break;
5460 }
5461 else if (c == '\'' || c == '"')
5462 maybe_label = 0;
5463 c = *template++;
5464 }
5465 sum += ppi_adjust;
5466 maybe_label = c != ':';
5467 }
5468 while (c);
5469 return sum;
5470 }
5471 return 0;
5472 }
5473 \f
5474 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
5475 isn't protected by a PIC unspec. */
5476 int
5477 nonpic_symbol_mentioned_p (x)
5478 rtx x;
5479 {
5480 register const char *fmt;
5481 register int i;
5482
5483 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5484 return 1;
5485
5486 if (GET_CODE (x) == UNSPEC
5487 && (XINT (x, 1) == UNSPEC_PIC
5488 || XINT (x, 1) == UNSPEC_GOT
5489 || XINT (x, 1) == UNSPEC_GOTOFF
5490 || XINT (x, 1) == UNSPEC_PLT))
5491 return 0;
5492
5493 fmt = GET_RTX_FORMAT (GET_CODE (x));
5494 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5495 {
5496 if (fmt[i] == 'E')
5497 {
5498 register int j;
5499
5500 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5501 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
5502 return 1;
5503 }
5504 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
5505 return 1;
5506 }
5507
5508 return 0;
5509 }
5510
5511 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
5512 @GOTOFF in `reg'. */
5513 rtx
5514 legitimize_pic_address (orig, mode, reg)
5515 rtx orig;
5516 enum machine_mode mode ATTRIBUTE_UNUSED;
5517 rtx reg;
5518 {
5519 if (GET_CODE (orig) == LABEL_REF
5520 || (GET_CODE (orig) == SYMBOL_REF
5521 && (CONSTANT_POOL_ADDRESS_P (orig)
5522 /* SYMBOL_REF_FLAG is set on static symbols. */
5523 || SYMBOL_REF_FLAG (orig))))
5524 {
5525 if (reg == 0)
5526 reg = gen_reg_rtx (Pmode);
5527
5528 emit_insn (gen_symGOTOFF2reg (reg, orig));
5529 return reg;
5530 }
5531 else if (GET_CODE (orig) == SYMBOL_REF)
5532 {
5533 if (reg == 0)
5534 reg = gen_reg_rtx (Pmode);
5535
5536 emit_insn (gen_symGOT2reg (reg, orig));
5537 return reg;
5538 }
5539 return orig;
5540 }
5541
5542 /* Mark the use of a constant in the literal table. If the constant
5543 has multiple labels, make it unique. */
5544 static rtx mark_constant_pool_use (x)
5545 rtx x;
5546 {
5547 rtx insn, lab, pattern;
5548
5549 if (x == NULL)
5550 return x;
5551
5552 switch (GET_CODE (x))
5553 {
5554 case LABEL_REF:
5555 x = XEXP (x, 0);
5556 case CODE_LABEL:
5557 break;
5558 default:
5559 return x;
5560 }
5561
5562 /* Get the first label in the list of labels for the same constant
5563 and delete another labels in the list. */
5564 lab = x;
5565 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
5566 {
5567 if (GET_CODE (insn) != CODE_LABEL
5568 || LABEL_REFS (insn) != NEXT_INSN (insn))
5569 break;
5570 lab = insn;
5571 }
5572
5573 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
5574 INSN_DELETED_P (insn) = 1;
5575
5576 /* Mark constants in a window. */
5577 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
5578 {
5579 if (GET_CODE (insn) != INSN)
5580 continue;
5581
5582 pattern = PATTERN (insn);
5583 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
5584 continue;
5585
5586 switch (XINT (pattern, 1))
5587 {
5588 case UNSPECV_CONST2:
5589 case UNSPECV_CONST4:
5590 case UNSPECV_CONST8:
5591 XVECEXP (pattern, 0, 1) = const1_rtx;
5592 break;
5593 case UNSPECV_WINDOW_END:
5594 if (XVECEXP (pattern, 0, 0) == x)
5595 return lab;
5596 break;
5597 case UNSPECV_CONST_END:
5598 return lab;
5599 default:
5600 break;
5601 }
5602 }
5603
5604 return lab;
5605 }
5606 \f
5607 /* Return true if it's possible to redirect BRANCH1 to the destination
5608 of an unconditional jump BRANCH2. We only want to do this if the
5609 resulting branch will have a short displacement. */
5610 int
5611 sh_can_redirect_branch (branch1, branch2)
5612 rtx branch1;
5613 rtx branch2;
5614 {
5615 if (flag_expensive_optimizations && simplejump_p (branch2))
5616 {
5617 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
5618 rtx insn;
5619 int distance;
5620
5621 for (distance = 0, insn = NEXT_INSN (branch1);
5622 insn && distance < 256;
5623 insn = PREV_INSN (insn))
5624 {
5625 if (insn == dest)
5626 return 1;
5627 else
5628 distance += get_attr_length (insn);
5629 }
5630 for (distance = 0, insn = NEXT_INSN (branch1);
5631 insn && distance < 256;
5632 insn = NEXT_INSN (insn))
5633 {
5634 if (insn == dest)
5635 return 1;
5636 else
5637 distance += get_attr_length (insn);
5638 }
5639 }
5640 return 0;
5641 }
5642
5643 #ifndef OBJECT_FORMAT_ELF
5644 static void
5645 sh_asm_named_section (name, flags)
5646 const char *name;
5647 unsigned int flags ATTRIBUTE_UNUSED;
5648 {
5649 /* ??? Perhaps we should be using default_coff_asm_named_section. */
5650 fprintf (asm_out_file, "\t.section %s\n", name);
5651 }
5652 #endif /* ! OBJECT_FORMAT_ELF */
5653
5654 /* A C statement (sans semicolon) to update the integer variable COST
5655 based on the relationship between INSN that is dependent on
5656 DEP_INSN through the dependence LINK. The default is to make no
5657 adjustment to COST. This can be used for example to specify to
5658 the scheduler that an output- or anti-dependence does not incur
5659 the same cost as a data-dependence. */
5660 static int
5661 sh_adjust_cost (insn, link, dep_insn, cost)
5662 rtx insn;
5663 rtx link ATTRIBUTE_UNUSED;
5664 rtx dep_insn;
5665 int cost;
5666 {
5667 rtx reg;
5668
5669 if (GET_CODE(insn) == CALL_INSN)
5670 {
5671 /* The only input for a call that is timing-critical is the
5672 function's address. */
5673 rtx call = PATTERN (insn);
5674
5675 if (GET_CODE (call) == PARALLEL)
5676 call = XVECEXP (call, 0 ,0);
5677 if (GET_CODE (call) == SET)
5678 call = SET_SRC (call);
5679 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
5680 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
5681 cost = 0;
5682 }
5683 /* All sfunc calls are parallels with at least four components.
5684 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
5685 else if (GET_CODE (PATTERN (insn)) == PARALLEL
5686 && XVECLEN (PATTERN (insn), 0) >= 4
5687 && (reg = sfunc_uses_reg (insn)))
5688 {
5689 /* Likewise, the most timing critical input for an sfuncs call
5690 is the function address. However, sfuncs typically start
5691 using their arguments pretty quickly.
5692 Assume a four cycle delay before they are needed. */
5693 if (! reg_set_p (reg, dep_insn))
5694 cost -= TARGET_SUPERSCALAR ? 40 : 4;
5695 }
5696 /* Adjust load_si / pcload_si type insns latency. Use the known
5697 nominal latency and form of the insn to speed up the check. */
5698 else if (cost == 3
5699 && GET_CODE (PATTERN (dep_insn)) == SET
5700 /* Latency for dmpy type insns is also 3, so check the that
5701 it's actually a move insn. */
5702 && general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))
5703 cost = 2;
5704 else if (cost == 30
5705 && GET_CODE (PATTERN (dep_insn)) == SET
5706 && GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode)
5707 cost = 20;
5708
5709 return cost;
5710 }
5711
5712 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
5713 'special function' patterns (type sfunc) that clobber pr, but that
5714 do not look like function calls to leaf_function_p. Hence we must
5715 do this extra check. */
5716 int
5717 sh_pr_n_sets ()
5718 {
5719 return REG_N_SETS (PR_REG);
5720 }
This page took 0.333605 seconds and 6 git commands to generate.