]> gcc.gnu.org Git - gcc.git/blob - gcc/config/sh/sh.c
gencodes.c (output_predicate_decls): New function.
[gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001
3 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GNU CC.
8
9 GNU CC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU CC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "insn-flags.h"
31 #include "expr.h"
32 #include "function.h"
33 #include "regs.h"
34 #include "hard-reg-set.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "toplev.h"
38 #include "recog.h"
39 #include "c-pragma.h"
40 #include "tm_p.h"
41
42 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
43
44 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
45 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
46
47 /* ??? The pragma interrupt support will not work for SH3. */
48 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
49 output code for the next function appropriate for an interrupt handler. */
50 int pragma_interrupt;
51
52 /* This is set by the trap_exit attribute for functions. It specifies
53 a trap number to be used in a trapa instruction at function exit
54 (instead of an rte instruction). */
55 int trap_exit;
56
57 /* This is used by the sp_switch attribute for functions. It specifies
58 a variable holding the address of the stack the interrupt function
59 should switch to/from at entry/exit. */
60 rtx sp_switch;
61
62 /* This is set by #pragma trapa, and is similar to the above, except that
63 the compiler doesn't emit code to preserve all registers. */
64 static int pragma_trapa;
65
66 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
67 which has a separate set of low regs for User and Supervisor modes.
68 This should only be used for the lowest level of interrupts. Higher levels
69 of interrupts must save the registers in case they themselves are
70 interrupted. */
71 int pragma_nosave_low_regs;
72
73 /* This is used for communication between SETUP_INCOMING_VARARGS and
74 sh_expand_prologue. */
75 int current_function_anonymous_args;
76
77 /* Global variables for machine-dependent things. */
78
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
81
82 /* Saved operands from the last compare to use when we generate an scc
83 or bcc insn. */
84
85 rtx sh_compare_op0;
86 rtx sh_compare_op1;
87
88 /* Provides the class number of the smallest class containing
89 reg number. */
90
91 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
92 {
93 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
94 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
95 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
96 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
97 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
98 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
99 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
100 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
101 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
102 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
103 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
104 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
105 FPSCR_REGS,
106 };
107
108 char fp_reg_names[][5] =
109 {
110 "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
111 "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
112 "fpul",
113 "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
114 };
115
116 /* Provide reg_class from a letter such as appears in the machine
117 description. */
118
119 enum reg_class reg_class_from_letter[] =
120 {
121 /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
122 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
123 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
124 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
125 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
126 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
127 /* y */ FPUL_REGS, /* z */ R0_REGS
128 };
129
130 int assembler_dialect;
131
132 static void split_branches PARAMS ((rtx));
133 static int branch_dest PARAMS ((rtx));
134 static void force_into PARAMS ((rtx, rtx));
135 static void print_slot PARAMS ((rtx));
136 static rtx add_constant PARAMS ((rtx, enum machine_mode, rtx));
137 static void dump_table PARAMS ((rtx));
138 static int hi_const PARAMS ((rtx));
139 static int broken_move PARAMS ((rtx));
140 static int mova_p PARAMS ((rtx));
141 static rtx find_barrier PARAMS ((int, rtx, rtx));
142 static int noncall_uses_reg PARAMS ((rtx, rtx, rtx *));
143 static rtx gen_block_redirect PARAMS ((rtx, int, int));
144 static void output_stack_adjust PARAMS ((int, rtx, int));
145 static void push PARAMS ((int));
146 static void pop PARAMS ((int));
147 static void push_regs PARAMS ((int, int));
148 static int calc_live_regs PARAMS ((int *, int *));
149 static void mark_use PARAMS ((rtx, rtx *));
150 static HOST_WIDE_INT rounded_frame_size PARAMS ((int));
151 \f
152 /* Print the operand address in x to the stream. */
153
154 void
155 print_operand_address (stream, x)
156 FILE *stream;
157 rtx x;
158 {
159 switch (GET_CODE (x))
160 {
161 case REG:
162 case SUBREG:
163 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
164 break;
165
166 case PLUS:
167 {
168 rtx base = XEXP (x, 0);
169 rtx index = XEXP (x, 1);
170
171 switch (GET_CODE (index))
172 {
173 case CONST_INT:
174 fprintf (stream, "@(%d,%s)", INTVAL (index),
175 reg_names[true_regnum (base)]);
176 break;
177
178 case REG:
179 case SUBREG:
180 {
181 int base_num = true_regnum (base);
182 int index_num = true_regnum (index);
183
184 fprintf (stream, "@(r0,%s)",
185 reg_names[MAX (base_num, index_num)]);
186 break;
187 }
188
189 default:
190 debug_rtx (x);
191 abort ();
192 }
193 }
194 break;
195
196 case PRE_DEC:
197 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
198 break;
199
200 case POST_INC:
201 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
202 break;
203
204 default:
205 output_addr_const (stream, x);
206 break;
207 }
208 }
209
210 /* Print operand x (an rtx) in assembler syntax to file stream
211 according to modifier code.
212
213 '.' print a .s if insn needs delay slot
214 ',' print LOCAL_LABEL_PREFIX
215 '@' print trap, rte or rts depending upon pragma interruptness
216 '#' output a nop if there is nothing to put in the delay slot
217 'O' print a constant without the #
218 'R' print the LSW of a dp value - changes if in little endian
219 'S' print the MSW of a dp value - changes if in little endian
220 'T' print the next word of a dp value - same as 'R' in big endian mode.
221 'o' output an operator. */
222
223 void
224 print_operand (stream, x, code)
225 FILE *stream;
226 rtx x;
227 int code;
228 {
229 switch (code)
230 {
231 case '.':
232 if (final_sequence
233 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
234 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
235 break;
236 case ',':
237 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
238 break;
239 case '@':
240 {
241 int interrupt_handler;
242
243 if ((lookup_attribute
244 ("interrupt_handler",
245 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
246 != NULL_TREE)
247 interrupt_handler = 1;
248 else
249 interrupt_handler = 0;
250
251 if (trap_exit)
252 fprintf (stream, "trapa #%d", trap_exit);
253 else if (interrupt_handler)
254 fprintf (stream, "rte");
255 else
256 fprintf (stream, "rts");
257 break;
258 }
259 case '#':
260 /* Output a nop if there's nothing in the delay slot. */
261 if (dbr_sequence_length () == 0)
262 fprintf (stream, "\n\tnop");
263 break;
264 case 'O':
265 output_addr_const (stream, x);
266 break;
267 case 'R':
268 fputs (reg_names[REGNO (x) + LSW], (stream));
269 break;
270 case 'S':
271 fputs (reg_names[REGNO (x) + MSW], (stream));
272 break;
273 case 'T':
274 /* Next word of a double. */
275 switch (GET_CODE (x))
276 {
277 case REG:
278 fputs (reg_names[REGNO (x) + 1], (stream));
279 break;
280 case MEM:
281 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
282 && GET_CODE (XEXP (x, 0)) != POST_INC)
283 x = adj_offsettable_operand (x, 4);
284 print_operand_address (stream, XEXP (x, 0));
285 break;
286 default:
287 break;
288 }
289 break;
290 case 'o':
291 switch (GET_CODE (x))
292 {
293 case PLUS: fputs ("add", stream); break;
294 case MINUS: fputs ("sub", stream); break;
295 case MULT: fputs ("mul", stream); break;
296 case DIV: fputs ("div", stream); break;
297 default:
298 break;
299 }
300 break;
301 default:
302 switch (GET_CODE (x))
303 {
304 case REG:
305 if (FP_REGISTER_P (REGNO (x))
306 && GET_MODE_SIZE (GET_MODE (x)) > 4)
307 fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
308 else
309 fputs (reg_names[REGNO (x)], (stream));
310 break;
311 case MEM:
312 output_address (XEXP (x, 0));
313 break;
314 default:
315 fputc ('#', stream);
316 output_addr_const (stream, x);
317 break;
318 }
319 break;
320 }
321 }
322 \f
323 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
324 static void
325 force_into (value, target)
326 rtx value, target;
327 {
328 value = force_operand (value, target);
329 if (! rtx_equal_p (value, target))
330 emit_insn (gen_move_insn (target, value));
331 }
332
333 /* Emit code to perform a block move. Choose the best method.
334
335 OPERANDS[0] is the destination.
336 OPERANDS[1] is the source.
337 OPERANDS[2] is the size.
338 OPERANDS[3] is the alignment safe to use. */
339
340 int
341 expand_block_move (operands)
342 rtx *operands;
343 {
344 int align = INTVAL (operands[3]);
345 int constp = (GET_CODE (operands[2]) == CONST_INT);
346 int bytes = (constp ? INTVAL (operands[2]) : 0);
347
348 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
349 alignment, or if it isn't a multiple of 4 bytes, then fail. */
350 if (! constp || align < 4 || (bytes % 4 != 0))
351 return 0;
352
353 if (TARGET_HARD_SH4)
354 {
355 if (bytes < 12)
356 return 0;
357 else if (bytes == 12)
358 {
359 tree entry_name;
360 rtx sym;
361 rtx func_addr_rtx;
362 rtx r4 = gen_rtx (REG, SImode, 4);
363 rtx r5 = gen_rtx (REG, SImode, 5);
364
365 entry_name = get_identifier ("__movstrSI12_i4");
366
367 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
368 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
369 force_into (XEXP (operands[0], 0), r4);
370 force_into (XEXP (operands[1], 0), r5);
371 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
372 return 1;
373 }
374 else if (! TARGET_SMALLCODE)
375 {
376 tree entry_name;
377 rtx sym;
378 rtx func_addr_rtx;
379 int dwords;
380 rtx r4 = gen_rtx (REG, SImode, 4);
381 rtx r5 = gen_rtx (REG, SImode, 5);
382 rtx r6 = gen_rtx (REG, SImode, 6);
383
384 entry_name = get_identifier (bytes & 4
385 ? "__movstr_i4_odd"
386 : "__movstr_i4_even");
387 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
388 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
389 force_into (XEXP (operands[0], 0), r4);
390 force_into (XEXP (operands[1], 0), r5);
391
392 dwords = bytes >> 3;
393 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
394 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
395 return 1;
396 }
397 else
398 return 0;
399 }
400 if (bytes < 64)
401 {
402 char entry[30];
403 tree entry_name;
404 rtx sym;
405 rtx func_addr_rtx;
406 rtx r4 = gen_rtx_REG (SImode, 4);
407 rtx r5 = gen_rtx_REG (SImode, 5);
408
409 sprintf (entry, "__movstrSI%d", bytes);
410 entry_name = get_identifier (entry);
411 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
412 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
413 force_into (XEXP (operands[0], 0), r4);
414 force_into (XEXP (operands[1], 0), r5);
415 emit_insn (gen_block_move_real (func_addr_rtx));
416 return 1;
417 }
418
419 /* This is the same number of bytes as a memcpy call, but to a different
420 less common function name, so this will occasionally use more space. */
421 if (! TARGET_SMALLCODE)
422 {
423 tree entry_name;
424 rtx sym;
425 rtx func_addr_rtx;
426 int final_switch, while_loop;
427 rtx r4 = gen_rtx_REG (SImode, 4);
428 rtx r5 = gen_rtx_REG (SImode, 5);
429 rtx r6 = gen_rtx_REG (SImode, 6);
430
431 entry_name = get_identifier ("__movstr");
432 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (entry_name));
433 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
434 force_into (XEXP (operands[0], 0), r4);
435 force_into (XEXP (operands[1], 0), r5);
436
437 /* r6 controls the size of the move. 16 is decremented from it
438 for each 64 bytes moved. Then the negative bit left over is used
439 as an index into a list of move instructions. e.g., a 72 byte move
440 would be set up with size(r6) = 14, for one iteration through the
441 big while loop, and a switch of -2 for the last part. */
442
443 final_switch = 16 - ((bytes / 4) % 16);
444 while_loop = ((bytes / 4) / 16 - 1) * 16;
445 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
446 emit_insn (gen_block_lump_real (func_addr_rtx));
447 return 1;
448 }
449
450 return 0;
451 }
452
453 /* Prepare operands for a move define_expand; specifically, one of the
454 operands must be in a register. */
455
456 int
457 prepare_move_operands (operands, mode)
458 rtx operands[];
459 enum machine_mode mode;
460 {
461 if (mode == SImode && flag_pic)
462 {
463 rtx temp;
464 if (SYMBOLIC_CONST_P (operands[1]))
465 {
466 if (GET_CODE (operands[0]) == MEM)
467 operands[1] = force_reg (Pmode, operands[1]);
468 else
469 {
470 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
471 operands[1] = legitimize_pic_address (operands[1], SImode, temp);
472 }
473 }
474 else if (GET_CODE (operands[1]) == CONST
475 && GET_CODE (XEXP (operands[1], 0)) == PLUS
476 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
477 {
478 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
479 SImode, gen_reg_rtx (Pmode));
480 operands[1] = expand_binop (SImode, add_optab, temp,
481 XEXP (XEXP (operands[1], 0), 1),
482 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
483 }
484 }
485
486 if (! reload_in_progress && ! reload_completed)
487 {
488 /* Copy the source to a register if both operands aren't registers. */
489 if (! register_operand (operands[0], mode)
490 && ! register_operand (operands[1], mode))
491 operands[1] = copy_to_mode_reg (mode, operands[1]);
492
493 /* This case can happen while generating code to move the result
494 of a library call to the target. Reject `st r0,@(rX,rY)' because
495 reload will fail to find a spill register for rX, since r0 is already
496 being used for the source. */
497 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
498 && GET_CODE (operands[0]) == MEM
499 && GET_CODE (XEXP (operands[0], 0)) == PLUS
500 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
501 operands[1] = copy_to_mode_reg (mode, operands[1]);
502 }
503
504 return 0;
505 }
506
507 /* Prepare the operands for an scc instruction; make sure that the
508 compare has been done. */
509 rtx
510 prepare_scc_operands (code)
511 enum rtx_code code;
512 {
513 rtx t_reg = gen_rtx_REG (SImode, T_REG);
514 enum rtx_code oldcode = code;
515 enum machine_mode mode;
516
517 /* First need a compare insn. */
518 switch (code)
519 {
520 case NE:
521 /* It isn't possible to handle this case. */
522 abort ();
523 case LT:
524 code = GT;
525 break;
526 case LE:
527 code = GE;
528 break;
529 case LTU:
530 code = GTU;
531 break;
532 case LEU:
533 code = GEU;
534 break;
535 default:
536 break;
537 }
538 if (code != oldcode)
539 {
540 rtx tmp = sh_compare_op0;
541 sh_compare_op0 = sh_compare_op1;
542 sh_compare_op1 = tmp;
543 }
544
545 mode = GET_MODE (sh_compare_op0);
546 if (mode == VOIDmode)
547 mode = GET_MODE (sh_compare_op1);
548
549 sh_compare_op0 = force_reg (mode, sh_compare_op0);
550 if ((code != EQ && code != NE
551 && (sh_compare_op1 != const0_rtx
552 || code == GTU || code == GEU || code == LTU || code == LEU))
553 || (mode == DImode && sh_compare_op1 != const0_rtx)
554 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
555 sh_compare_op1 = force_reg (mode, sh_compare_op1);
556
557 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
558 (mode == SFmode ? emit_sf_insn : emit_df_insn)
559 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
560 gen_rtx (SET, VOIDmode, t_reg,
561 gen_rtx (code, SImode,
562 sh_compare_op0, sh_compare_op1)),
563 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
564 else
565 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
566 gen_rtx (code, SImode, sh_compare_op0,
567 sh_compare_op1)));
568
569 return t_reg;
570 }
571
572 /* Called from the md file, set up the operands of a compare instruction. */
573
574 void
575 from_compare (operands, code)
576 rtx *operands;
577 int code;
578 {
579 enum machine_mode mode = GET_MODE (sh_compare_op0);
580 rtx insn;
581 if (mode == VOIDmode)
582 mode = GET_MODE (sh_compare_op1);
583 if (code != EQ
584 || mode == DImode
585 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
586 {
587 /* Force args into regs, since we can't use constants here. */
588 sh_compare_op0 = force_reg (mode, sh_compare_op0);
589 if (sh_compare_op1 != const0_rtx
590 || code == GTU || code == GEU
591 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
592 sh_compare_op1 = force_reg (mode, sh_compare_op1);
593 }
594 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
595 {
596 from_compare (operands, GT);
597 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
598 }
599 else
600 insn = gen_rtx_SET (VOIDmode,
601 gen_rtx_REG (SImode, T_REG),
602 gen_rtx (code, SImode, sh_compare_op0,
603 sh_compare_op1));
604 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
605 {
606 insn = gen_rtx (PARALLEL, VOIDmode,
607 gen_rtvec (2, insn,
608 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
609 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
610 }
611 else
612 emit_insn (insn);
613 }
614 \f
615 /* Functions to output assembly code. */
616
617 /* Return a sequence of instructions to perform DI or DF move.
618
619 Since the SH cannot move a DI or DF in one instruction, we have
620 to take care when we see overlapping source and dest registers. */
621
622 const char *
623 output_movedouble (insn, operands, mode)
624 rtx insn ATTRIBUTE_UNUSED;
625 rtx operands[];
626 enum machine_mode mode;
627 {
628 rtx dst = operands[0];
629 rtx src = operands[1];
630
631 if (GET_CODE (dst) == MEM
632 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
633 return "mov.l %T1,%0\n\tmov.l %1,%0";
634
635 if (register_operand (dst, mode)
636 && register_operand (src, mode))
637 {
638 if (REGNO (src) == MACH_REG)
639 return "sts mach,%S0\n\tsts macl,%R0";
640
641 /* When mov.d r1,r2 do r2->r3 then r1->r2;
642 when mov.d r1,r0 do r1->r0 then r2->r1. */
643
644 if (REGNO (src) + 1 == REGNO (dst))
645 return "mov %T1,%T0\n\tmov %1,%0";
646 else
647 return "mov %1,%0\n\tmov %T1,%T0";
648 }
649 else if (GET_CODE (src) == CONST_INT)
650 {
651 if (INTVAL (src) < 0)
652 output_asm_insn ("mov #-1,%S0", operands);
653 else
654 output_asm_insn ("mov #0,%S0", operands);
655
656 return "mov %1,%R0";
657 }
658 else if (GET_CODE (src) == MEM)
659 {
660 int ptrreg = -1;
661 int dreg = REGNO (dst);
662 rtx inside = XEXP (src, 0);
663
664 if (GET_CODE (inside) == REG)
665 ptrreg = REGNO (inside);
666 else if (GET_CODE (inside) == SUBREG)
667 ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
668 else if (GET_CODE (inside) == PLUS)
669 {
670 ptrreg = REGNO (XEXP (inside, 0));
671 /* ??? A r0+REG address shouldn't be possible here, because it isn't
672 an offsettable address. Unfortunately, offsettable addresses use
673 QImode to check the offset, and a QImode offsettable address
674 requires r0 for the other operand, which is not currently
675 supported, so we can't use the 'o' constraint.
676 Thus we must check for and handle r0+REG addresses here.
677 We punt for now, since this is likely very rare. */
678 if (GET_CODE (XEXP (inside, 1)) == REG)
679 abort ();
680 }
681 else if (GET_CODE (inside) == LABEL_REF)
682 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
683 else if (GET_CODE (inside) == POST_INC)
684 return "mov.l %1,%0\n\tmov.l %1,%T0";
685 else
686 abort ();
687
688 /* Work out the safe way to copy. Copy into the second half first. */
689 if (dreg == ptrreg)
690 return "mov.l %T1,%T0\n\tmov.l %1,%0";
691 }
692
693 return "mov.l %1,%0\n\tmov.l %T1,%T0";
694 }
695
696 /* Print an instruction which would have gone into a delay slot after
697 another instruction, but couldn't because the other instruction expanded
698 into a sequence where putting the slot insn at the end wouldn't work. */
699
700 static void
701 print_slot (insn)
702 rtx insn;
703 {
704 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
705
706 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
707 }
708
709 const char *
710 output_far_jump (insn, op)
711 rtx insn;
712 rtx op;
713 {
714 struct { rtx lab, reg, op; } this;
715 const char *jump;
716 int far;
717 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
718
719 this.lab = gen_label_rtx ();
720
721 if (TARGET_SH2
722 && offset >= -32764
723 && offset - get_attr_length (insn) <= 32766)
724 {
725 far = 0;
726 jump = "mov.w %O0,%1;braf %1";
727 }
728 else
729 {
730 far = 1;
731 if (flag_pic)
732 jump = "mov.l %O0,%1;braf %1";
733 else
734 jump = "mov.l %O0,%1;jmp @%1";
735 }
736 /* If we have a scratch register available, use it. */
737 if (GET_CODE (PREV_INSN (insn)) == INSN
738 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
739 {
740 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
741 output_asm_insn (jump, &this.lab);
742 if (dbr_sequence_length ())
743 print_slot (final_sequence);
744 else
745 output_asm_insn ("nop", 0);
746 }
747 else
748 {
749 /* Output the delay slot insn first if any. */
750 if (dbr_sequence_length ())
751 print_slot (final_sequence);
752
753 this.reg = gen_rtx_REG (SImode, 13);
754 output_asm_insn ("mov.l r13,@-r15", 0);
755 output_asm_insn (jump, &this.lab);
756 output_asm_insn ("mov.l @r15+,r13", 0);
757 }
758 if (far)
759 output_asm_insn (".align 2", 0);
760 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
761 this.op = op;
762 if (far && flag_pic)
763 output_asm_insn (".long %O2-%O0", &this.lab);
764 else
765 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
766 return "";
767 }
768
769 /* Local label counter, used for constants in the pool and inside
770 pattern branches. */
771
772 static int lf = 100;
773
774 /* Output code for ordinary branches. */
775
776 const char *
777 output_branch (logic, insn, operands)
778 int logic;
779 rtx insn;
780 rtx *operands;
781 {
782 switch (get_attr_length (insn))
783 {
784 case 6:
785 /* This can happen if filling the delay slot has caused a forward
786 branch to exceed its range (we could reverse it, but only
787 when we know we won't overextend other branches; this should
788 best be handled by relaxation).
789 It can also happen when other condbranches hoist delay slot insn
790 from their destination, thus leading to code size increase.
791 But the branch will still be in the range -4092..+4098 bytes. */
792
793 if (! TARGET_RELAX)
794 {
795 int label = lf++;
796 /* The call to print_slot will clobber the operands. */
797 rtx op0 = operands[0];
798
799 /* If the instruction in the delay slot is annulled (true), then
800 there is no delay slot where we can put it now. The only safe
801 place for it is after the label. final will do that by default. */
802
803 if (final_sequence
804 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
805 {
806 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
807 ASSEMBLER_DIALECT ? "/" : ".", label);
808 print_slot (final_sequence);
809 }
810 else
811 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
812
813 output_asm_insn ("bra\t%l0", &op0);
814 fprintf (asm_out_file, "\tnop\n");
815 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
816
817 return "";
818 }
819 /* When relaxing, handle this like a short branch. The linker
820 will fix it up if it still doesn't fit after relaxation. */
821 case 2:
822 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
823 default:
824 abort ();
825 }
826 }
827
828 const char *
829 output_branchy_insn (code, template, insn, operands)
830 enum rtx_code code;
831 const char *template;
832 rtx insn;
833 rtx *operands;
834 {
835 rtx next_insn = NEXT_INSN (insn);
836
837 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
838 {
839 rtx src = SET_SRC (PATTERN (next_insn));
840 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
841 {
842 /* Following branch not taken */
843 operands[9] = gen_label_rtx ();
844 emit_label_after (operands[9], next_insn);
845 INSN_ADDRESSES_NEW (operands[9],
846 INSN_ADDRESSES (INSN_UID (next_insn))
847 + get_attr_length (next_insn));
848 return template;
849 }
850 else
851 {
852 int offset = (branch_dest (next_insn)
853 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
854 if (offset >= -252 && offset <= 258)
855 {
856 if (GET_CODE (src) == IF_THEN_ELSE)
857 /* branch_true */
858 src = XEXP (src, 1);
859 operands[9] = src;
860 return template;
861 }
862 }
863 }
864 operands[9] = gen_label_rtx ();
865 emit_label_after (operands[9], insn);
866 INSN_ADDRESSES_NEW (operands[9],
867 INSN_ADDRESSES (INSN_UID (insn))
868 + get_attr_length (insn));
869 return template;
870 }
871
872 const char *
873 output_ieee_ccmpeq (insn, operands)
874 rtx insn, *operands;
875 {
876 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
877 }
878 \f
879 /* Output to FILE the start of the assembler file. */
880
881 void
882 output_file_start (file)
883 FILE *file;
884 {
885 output_file_directive (file, main_input_filename);
886
887 /* Switch to the data section so that the coffsem symbol and the
888 gcc2_compiled. symbol aren't in the text section. */
889 data_section ();
890
891 if (TARGET_LITTLE_ENDIAN)
892 fprintf (file, "\t.little\n");
893 }
894 \f
895 /* Actual number of instructions used to make a shift by N. */
896 static char ashiftrt_insns[] =
897 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
898
899 /* Left shift and logical right shift are the same. */
900 static char shift_insns[] =
901 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
902
903 /* Individual shift amounts needed to get the above length sequences.
904 One bit right shifts clobber the T bit, so when possible, put one bit
905 shifts in the middle of the sequence, so the ends are eligible for
906 branch delay slots. */
907 static short shift_amounts[32][5] = {
908 {0}, {1}, {2}, {2, 1},
909 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
910 {8}, {8, 1}, {8, 2}, {8, 1, 2},
911 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
912 {16}, {16, 1}, {16, 2}, {16, 1, 2},
913 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
914 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
915 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
916
917 /* Likewise, but for shift amounts < 16, up to three highmost bits
918 might be clobbered. This is typically used when combined with some
919 kind of sign or zero extension. */
920
921 static char ext_shift_insns[] =
922 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
923
924 static short ext_shift_amounts[32][4] = {
925 {0}, {1}, {2}, {2, 1},
926 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
927 {8}, {8, 1}, {8, 2}, {8, 1, 2},
928 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
929 {16}, {16, 1}, {16, 2}, {16, 1, 2},
930 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
931 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
932 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
933
934 /* Assuming we have a value that has been sign-extended by at least one bit,
935 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
936 to shift it by N without data loss, and quicker than by other means? */
937 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
938
939 /* This is used in length attributes in sh.md to help compute the length
940 of arbitrary constant shift instructions. */
941
942 int
943 shift_insns_rtx (insn)
944 rtx insn;
945 {
946 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
947 int shift_count = INTVAL (XEXP (set_src, 1));
948 enum rtx_code shift_code = GET_CODE (set_src);
949
950 switch (shift_code)
951 {
952 case ASHIFTRT:
953 return ashiftrt_insns[shift_count];
954 case LSHIFTRT:
955 case ASHIFT:
956 return shift_insns[shift_count];
957 default:
958 abort();
959 }
960 }
961
962 /* Return the cost of a shift. */
963
964 int
965 shiftcosts (x)
966 rtx x;
967 {
968 int value;
969
970 /* If shift by a non constant, then this will be expensive. */
971 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
972 return SH_DYNAMIC_SHIFT_COST;
973
974 value = INTVAL (XEXP (x, 1));
975
976 /* Otherwise, return the true cost in instructions. */
977 if (GET_CODE (x) == ASHIFTRT)
978 {
979 int cost = ashiftrt_insns[value];
980 /* If SH3, then we put the constant in a reg and use shad. */
981 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
982 cost = 1 + SH_DYNAMIC_SHIFT_COST;
983 return cost;
984 }
985 else
986 return shift_insns[value];
987 }
988
989 /* Return the cost of an AND operation. */
990
991 int
992 andcosts (x)
993 rtx x;
994 {
995 int i;
996
997 /* Anding with a register is a single cycle and instruction. */
998 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
999 return 1;
1000
1001 i = INTVAL (XEXP (x, 1));
1002 /* These constants are single cycle extu.[bw] instructions. */
1003 if (i == 0xff || i == 0xffff)
1004 return 1;
1005 /* Constants that can be used in an and immediate instruction is a single
1006 cycle, but this requires r0, so make it a little more expensive. */
1007 if (CONST_OK_FOR_L (i))
1008 return 2;
1009 /* Constants that can be loaded with a mov immediate and an and.
1010 This case is probably unnecessary. */
1011 if (CONST_OK_FOR_I (i))
1012 return 2;
1013 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1014 This case is probably unnecessary. */
1015 return 3;
1016 }
1017
1018 /* Return the cost of an addition or a subtraction. */
1019
1020 int
1021 addsubcosts (x)
1022 rtx x;
1023 {
1024 /* Adding a register is a single cycle insn. */
1025 if (GET_CODE (XEXP (x, 1)) == REG
1026 || GET_CODE (XEXP (x, 1)) == SUBREG)
1027 return 1;
1028
1029 /* Likewise for small constants. */
1030 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1031 && CONST_OK_FOR_I (INTVAL (XEXP (x, 1))))
1032 return 1;
1033
1034 /* Any other constant requires a 2 cycle pc-relative load plus an
1035 addition. */
1036 return 3;
1037 }
1038
1039 /* Return the cost of a multiply. */
1040 int
1041 multcosts (x)
1042 rtx x ATTRIBUTE_UNUSED;
1043 {
1044 if (TARGET_SH2)
1045 {
1046 /* We have a mul insn, so we can never take more than the mul and the
1047 read of the mac reg, but count more because of the latency and extra
1048 reg usage. */
1049 if (TARGET_SMALLCODE)
1050 return 2;
1051 return 3;
1052 }
1053
1054 /* If we're aiming at small code, then just count the number of
1055 insns in a multiply call sequence. */
1056 if (TARGET_SMALLCODE)
1057 return 5;
1058
1059 /* Otherwise count all the insns in the routine we'd be calling too. */
1060 return 20;
1061 }
1062
1063 /* Code to expand a shift. */
1064
1065 void
1066 gen_ashift (type, n, reg)
1067 int type;
1068 int n;
1069 rtx reg;
1070 {
1071 /* Negative values here come from the shift_amounts array. */
1072 if (n < 0)
1073 {
1074 if (type == ASHIFT)
1075 type = LSHIFTRT;
1076 else
1077 type = ASHIFT;
1078 n = -n;
1079 }
1080
1081 switch (type)
1082 {
1083 case ASHIFTRT:
1084 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1085 break;
1086 case LSHIFTRT:
1087 if (n == 1)
1088 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1089 else
1090 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1091 break;
1092 case ASHIFT:
1093 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1094 break;
1095 }
1096 }
1097
1098 /* Same for HImode */
1099
1100 void
1101 gen_ashift_hi (type, n, reg)
1102 int type;
1103 int n;
1104 rtx reg;
1105 {
1106 /* Negative values here come from the shift_amounts array. */
1107 if (n < 0)
1108 {
1109 if (type == ASHIFT)
1110 type = LSHIFTRT;
1111 else
1112 type = ASHIFT;
1113 n = -n;
1114 }
1115
1116 switch (type)
1117 {
1118 case ASHIFTRT:
1119 case LSHIFTRT:
1120 /* We don't have HImode right shift operations because using the
1121 ordinary 32 bit shift instructions for that doesn't generate proper
1122 zero/sign extension.
1123 gen_ashift_hi is only called in contexts where we know that the
1124 sign extension works out correctly. */
1125 {
1126 int word = 0;
1127 if (GET_CODE (reg) == SUBREG)
1128 {
1129 word = SUBREG_WORD (reg);
1130 reg = SUBREG_REG (reg);
1131 }
1132 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
1133 break;
1134 }
1135 case ASHIFT:
1136 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1137 break;
1138 }
1139 }
1140
1141 /* Output RTL to split a constant shift into its component SH constant
1142 shift instructions. */
1143
1144 void
1145 gen_shifty_op (code, operands)
1146 int code;
1147 rtx *operands;
1148 {
1149 int value = INTVAL (operands[2]);
1150 int max, i;
1151
1152 /* Truncate the shift count in case it is out of bounds. */
1153 value = value & 0x1f;
1154
1155 if (value == 31)
1156 {
1157 if (code == LSHIFTRT)
1158 {
1159 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1160 emit_insn (gen_movt (operands[0]));
1161 return;
1162 }
1163 else if (code == ASHIFT)
1164 {
1165 /* There is a two instruction sequence for 31 bit left shifts,
1166 but it requires r0. */
1167 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1168 {
1169 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1170 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1171 return;
1172 }
1173 }
1174 }
1175 else if (value == 0)
1176 {
1177 /* This can happen when not optimizing. We must output something here
1178 to prevent the compiler from aborting in final.c after the try_split
1179 call. */
1180 emit_insn (gen_nop ());
1181 return;
1182 }
1183
1184 max = shift_insns[value];
1185 for (i = 0; i < max; i++)
1186 gen_ashift (code, shift_amounts[value][i], operands[0]);
1187 }
1188
1189 /* Same as above, but optimized for values where the topmost bits don't
1190 matter. */
1191
1192 void
1193 gen_shifty_hi_op (code, operands)
1194 int code;
1195 rtx *operands;
1196 {
1197 int value = INTVAL (operands[2]);
1198 int max, i;
1199 void (*gen_fun) PARAMS ((int, int, rtx));
1200
1201 /* This operation is used by and_shl for SImode values with a few
1202 high bits known to be cleared. */
1203 value &= 31;
1204 if (value == 0)
1205 {
1206 emit_insn (gen_nop ());
1207 return;
1208 }
1209
1210 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1211 if (code == ASHIFT)
1212 {
1213 max = ext_shift_insns[value];
1214 for (i = 0; i < max; i++)
1215 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1216 }
1217 else
1218 /* When shifting right, emit the shifts in reverse order, so that
1219 solitary negative values come first. */
1220 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1221 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1222 }
1223
1224 /* Output RTL for an arithmetic right shift. */
1225
1226 /* ??? Rewrite to use super-optimizer sequences. */
1227
1228 int
1229 expand_ashiftrt (operands)
1230 rtx *operands;
1231 {
1232 rtx sym;
1233 rtx wrk;
1234 char func[18];
1235 tree func_name;
1236 int value;
1237
1238 if (TARGET_SH3)
1239 {
1240 if (GET_CODE (operands[2]) != CONST_INT)
1241 {
1242 rtx count = copy_to_mode_reg (SImode, operands[2]);
1243 emit_insn (gen_negsi2 (count, count));
1244 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1245 return 1;
1246 }
1247 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1248 > 1 + SH_DYNAMIC_SHIFT_COST)
1249 {
1250 rtx count
1251 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1252 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1253 return 1;
1254 }
1255 }
1256 if (GET_CODE (operands[2]) != CONST_INT)
1257 return 0;
1258
1259 value = INTVAL (operands[2]) & 31;
1260
1261 if (value == 31)
1262 {
1263 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1264 return 1;
1265 }
1266 else if (value >= 16 && value <= 19)
1267 {
1268 wrk = gen_reg_rtx (SImode);
1269 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1270 value -= 16;
1271 while (value--)
1272 gen_ashift (ASHIFTRT, 1, wrk);
1273 emit_move_insn (operands[0], wrk);
1274 return 1;
1275 }
1276 /* Expand a short sequence inline, longer call a magic routine. */
1277 else if (value <= 5)
1278 {
1279 wrk = gen_reg_rtx (SImode);
1280 emit_move_insn (wrk, operands[1]);
1281 while (value--)
1282 gen_ashift (ASHIFTRT, 1, wrk);
1283 emit_move_insn (operands[0], wrk);
1284 return 1;
1285 }
1286
1287 wrk = gen_reg_rtx (Pmode);
1288
1289 /* Load the value into an arg reg and call a helper. */
1290 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1291 sprintf (func, "__ashiftrt_r4_%d", value);
1292 func_name = get_identifier (func);
1293 sym = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (func_name));
1294 emit_move_insn (wrk, sym);
1295 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1296 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1297 return 1;
1298 }
1299
1300 int
1301 sh_dynamicalize_shift_p (count)
1302 rtx count;
1303 {
1304 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1305 }
1306
1307 /* Try to find a good way to implement the combiner pattern
1308 [(set (match_operand:SI 0 "register_operand" "r")
1309 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1310 (match_operand:SI 2 "const_int_operand" "n"))
1311 (match_operand:SI 3 "const_int_operand" "n"))) .
1312 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1313 return 0 for simple right / left or left/right shift combination.
1314 return 1 for a combination of shifts with zero_extend.
1315 return 2 for a combination of shifts with an AND that needs r0.
1316 return 3 for a combination of shifts with an AND that needs an extra
1317 scratch register, when the three highmost bits of the AND mask are clear.
1318 return 4 for a combination of shifts with an AND that needs an extra
1319 scratch register, when any of the three highmost bits of the AND mask
1320 is set.
1321 If ATTRP is set, store an initial right shift width in ATTRP[0],
1322 and the instruction length in ATTRP[1] . These values are not valid
1323 when returning 0.
1324 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1325 shift_amounts for the last shift value that is to be used before the
1326 sign extend. */
1327 int
1328 shl_and_kind (left_rtx, mask_rtx, attrp)
1329 rtx left_rtx, mask_rtx;
1330 int *attrp;
1331 {
1332 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1333 int left = INTVAL (left_rtx), right;
1334 int best = 0;
1335 int cost, best_cost = 10000;
1336 int best_right = 0, best_len = 0;
1337 int i;
1338 int can_ext;
1339
1340 if (left < 0 || left > 31)
1341 return 0;
1342 if (GET_CODE (mask_rtx) == CONST_INT)
1343 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1344 else
1345 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1346 /* Can this be expressed as a right shift / left shift pair ? */
1347 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1348 right = exact_log2 (lsb);
1349 mask2 = ~(mask + lsb - 1);
1350 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1351 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1352 if (! mask2)
1353 best_cost = shift_insns[right] + shift_insns[right + left];
1354 /* mask has no trailing zeroes <==> ! right */
1355 else if (! right && mask2 == ~(lsb2 - 1))
1356 {
1357 int late_right = exact_log2 (lsb2);
1358 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1359 }
1360 /* Try to use zero extend */
1361 if (mask2 == ~(lsb2 - 1))
1362 {
1363 int width, first;
1364
1365 for (width = 8; width <= 16; width += 8)
1366 {
1367 /* Can we zero-extend right away? */
1368 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
1369 {
1370 cost
1371 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1372 if (cost < best_cost)
1373 {
1374 best = 1;
1375 best_cost = cost;
1376 best_right = right;
1377 best_len = cost;
1378 if (attrp)
1379 attrp[2] = -1;
1380 }
1381 continue;
1382 }
1383 /* ??? Could try to put zero extend into initial right shift,
1384 or even shift a bit left before the right shift. */
1385 /* Determine value of first part of left shift, to get to the
1386 zero extend cut-off point. */
1387 first = width - exact_log2 (lsb2) + right;
1388 if (first >= 0 && right + left - first >= 0)
1389 {
1390 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1391 + ext_shift_insns[right + left - first];
1392 if (cost < best_cost)
1393 {
1394 best = 1;
1395 best_cost = cost;
1396 best_right = right;
1397 best_len = cost;
1398 if (attrp)
1399 attrp[2] = first;
1400 }
1401 }
1402 }
1403 }
1404 /* Try to use r0 AND pattern */
1405 for (i = 0; i <= 2; i++)
1406 {
1407 if (i > right)
1408 break;
1409 if (! CONST_OK_FOR_L (mask >> i))
1410 continue;
1411 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1412 if (cost < best_cost)
1413 {
1414 best = 2;
1415 best_cost = cost;
1416 best_right = i;
1417 best_len = cost - 1;
1418 }
1419 }
1420 /* Try to use a scratch register to hold the AND operand. */
1421 can_ext = ((mask << left) & 0xe0000000) == 0;
1422 for (i = 0; i <= 2; i++)
1423 {
1424 if (i > right)
1425 break;
1426 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1427 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1428 if (cost < best_cost)
1429 {
1430 best = 4 - can_ext;
1431 best_cost = cost;
1432 best_right = i;
1433 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1434 }
1435 }
1436
1437 if (attrp)
1438 {
1439 attrp[0] = best_right;
1440 attrp[1] = best_len;
1441 }
1442 return best;
1443 }
1444
1445 /* This is used in length attributes of the unnamed instructions
1446 corresponding to shl_and_kind return values of 1 and 2. */
1447 int
1448 shl_and_length (insn)
1449 rtx insn;
1450 {
1451 rtx set_src, left_rtx, mask_rtx;
1452 int attributes[3];
1453
1454 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1455 left_rtx = XEXP (XEXP (set_src, 0), 1);
1456 mask_rtx = XEXP (set_src, 1);
1457 shl_and_kind (left_rtx, mask_rtx, attributes);
1458 return attributes[1];
1459 }
1460
1461 /* This is used in length attribute of the and_shl_scratch instruction. */
1462
1463 int
1464 shl_and_scr_length (insn)
1465 rtx insn;
1466 {
1467 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1468 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1469 rtx op = XEXP (set_src, 0);
1470 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1471 op = XEXP (XEXP (op, 0), 0);
1472 return len + shift_insns[INTVAL (XEXP (op, 1))];
1473 }
1474
1475 /* Generating rtl? */
1476 extern int rtx_equal_function_value_matters;
1477
1478 /* Generate rtl for instructions for which shl_and_kind advised a particular
1479 method of generating them, i.e. returned zero. */
1480
1481 int
1482 gen_shl_and (dest, left_rtx, mask_rtx, source)
1483 rtx dest, left_rtx, mask_rtx, source;
1484 {
1485 int attributes[3];
1486 unsigned HOST_WIDE_INT mask;
1487 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1488 int right, total_shift;
1489 void (*shift_gen_fun) PARAMS ((int, rtx*)) = gen_shifty_hi_op;
1490
1491 right = attributes[0];
1492 total_shift = INTVAL (left_rtx) + right;
1493 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1494 switch (kind)
1495 {
1496 default:
1497 return -1;
1498 case 1:
1499 {
1500 int first = attributes[2];
1501 rtx operands[3];
1502
1503 if (first < 0)
1504 {
1505 emit_insn ((mask << right) <= 0xff
1506 ? gen_zero_extendqisi2(dest,
1507 gen_lowpart (QImode, source))
1508 : gen_zero_extendhisi2(dest,
1509 gen_lowpart (HImode, source)));
1510 source = dest;
1511 }
1512 if (source != dest)
1513 emit_insn (gen_movsi (dest, source));
1514 operands[0] = dest;
1515 if (right)
1516 {
1517 operands[2] = GEN_INT (right);
1518 gen_shifty_hi_op (LSHIFTRT, operands);
1519 }
1520 if (first > 0)
1521 {
1522 operands[2] = GEN_INT (first);
1523 gen_shifty_hi_op (ASHIFT, operands);
1524 total_shift -= first;
1525 mask <<= first;
1526 }
1527 if (first >= 0)
1528 emit_insn (mask <= 0xff
1529 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1530 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1531 if (total_shift > 0)
1532 {
1533 operands[2] = GEN_INT (total_shift);
1534 gen_shifty_hi_op (ASHIFT, operands);
1535 }
1536 break;
1537 }
1538 case 4:
1539 shift_gen_fun = gen_shifty_op;
1540 case 3:
1541 /* If the topmost bit that matters is set, set the topmost bits
1542 that don't matter. This way, we might be able to get a shorter
1543 signed constant. */
1544 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
1545 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1546 case 2:
1547 /* Don't expand fine-grained when combining, because that will
1548 make the pattern fail. */
1549 if (rtx_equal_function_value_matters
1550 || reload_in_progress || reload_completed)
1551 {
1552 rtx operands[3];
1553
1554 /* Cases 3 and 4 should be handled by this split
1555 only while combining */
1556 if (kind > 2)
1557 abort ();
1558 if (right)
1559 {
1560 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1561 source = dest;
1562 }
1563 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1564 if (total_shift)
1565 {
1566 operands[0] = dest;
1567 operands[1] = dest;
1568 operands[2] = GEN_INT (total_shift);
1569 shift_gen_fun (ASHIFT, operands);
1570 }
1571 break;
1572 }
1573 else
1574 {
1575 int neg = 0;
1576 if (kind != 4 && total_shift < 16)
1577 {
1578 neg = -ext_shift_amounts[total_shift][1];
1579 if (neg > 0)
1580 neg -= ext_shift_amounts[total_shift][2];
1581 else
1582 neg = 0;
1583 }
1584 emit_insn (gen_and_shl_scratch (dest, source,
1585 GEN_INT (right),
1586 GEN_INT (mask),
1587 GEN_INT (total_shift + neg),
1588 GEN_INT (neg)));
1589 emit_insn (gen_movsi (dest, dest));
1590 break;
1591 }
1592 }
1593 return 0;
1594 }
1595
1596 /* Try to find a good way to implement the combiner pattern
1597 [(set (match_operand:SI 0 "register_operand" "=r")
1598 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1599 (match_operand:SI 2 "const_int_operand" "n")
1600 (match_operand:SI 3 "const_int_operand" "n")
1601 (const_int 0)))
1602 (clobber (reg:SI T_REG))]
1603 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1604 return 0 for simple left / right shift combination.
1605 return 1 for left shift / 8 bit sign extend / left shift.
1606 return 2 for left shift / 16 bit sign extend / left shift.
1607 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1608 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1609 return 5 for left shift / 16 bit sign extend / right shift
1610 return 6 for < 8 bit sign extend / left shift.
1611 return 7 for < 8 bit sign extend / left shift / single right shift.
1612 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1613
1614 int
1615 shl_sext_kind (left_rtx, size_rtx, costp)
1616 rtx left_rtx, size_rtx;
1617 int *costp;
1618 {
1619 int left, size, insize, ext;
1620 int cost, best_cost;
1621 int kind;
1622
1623 left = INTVAL (left_rtx);
1624 size = INTVAL (size_rtx);
1625 insize = size - left;
1626 if (insize <= 0)
1627 abort ();
1628 /* Default to left / right shift. */
1629 kind = 0;
1630 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1631 if (size <= 16)
1632 {
1633 /* 16 bit shift / sign extend / 16 bit shift */
1634 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1635 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1636 below, by alternative 3 or something even better. */
1637 if (cost < best_cost)
1638 {
1639 kind = 5;
1640 best_cost = cost;
1641 }
1642 }
1643 /* Try a plain sign extend between two shifts. */
1644 for (ext = 16; ext >= insize; ext -= 8)
1645 {
1646 if (ext <= size)
1647 {
1648 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1649 if (cost < best_cost)
1650 {
1651 kind = ext / 8U;
1652 best_cost = cost;
1653 }
1654 }
1655 /* Check if we can do a sloppy shift with a final signed shift
1656 restoring the sign. */
1657 if (EXT_SHIFT_SIGNED (size - ext))
1658 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1659 /* If not, maybe it's still cheaper to do the second shift sloppy,
1660 and do a final sign extend? */
1661 else if (size <= 16)
1662 cost = ext_shift_insns[ext - insize] + 1
1663 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1664 else
1665 continue;
1666 if (cost < best_cost)
1667 {
1668 kind = ext / 8U + 2;
1669 best_cost = cost;
1670 }
1671 }
1672 /* Check if we can sign extend in r0 */
1673 if (insize < 8)
1674 {
1675 cost = 3 + shift_insns[left];
1676 if (cost < best_cost)
1677 {
1678 kind = 6;
1679 best_cost = cost;
1680 }
1681 /* Try the same with a final signed shift. */
1682 if (left < 31)
1683 {
1684 cost = 3 + ext_shift_insns[left + 1] + 1;
1685 if (cost < best_cost)
1686 {
1687 kind = 7;
1688 best_cost = cost;
1689 }
1690 }
1691 }
1692 if (TARGET_SH3)
1693 {
1694 /* Try to use a dynamic shift. */
1695 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1696 if (cost < best_cost)
1697 {
1698 kind = 0;
1699 best_cost = cost;
1700 }
1701 }
1702 if (costp)
1703 *costp = cost;
1704 return kind;
1705 }
1706
1707 /* Function to be used in the length attribute of the instructions
1708 implementing this pattern. */
1709
1710 int
1711 shl_sext_length (insn)
1712 rtx insn;
1713 {
1714 rtx set_src, left_rtx, size_rtx;
1715 int cost;
1716
1717 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1718 left_rtx = XEXP (XEXP (set_src, 0), 1);
1719 size_rtx = XEXP (set_src, 1);
1720 shl_sext_kind (left_rtx, size_rtx, &cost);
1721 return cost;
1722 }
1723
1724 /* Generate rtl for this pattern */
1725
1726 int
1727 gen_shl_sext (dest, left_rtx, size_rtx, source)
1728 rtx dest, left_rtx, size_rtx, source;
1729 {
1730 int kind;
1731 int left, size, insize, cost;
1732 rtx operands[3];
1733
1734 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1735 left = INTVAL (left_rtx);
1736 size = INTVAL (size_rtx);
1737 insize = size - left;
1738 switch (kind)
1739 {
1740 case 1:
1741 case 2:
1742 case 3:
1743 case 4:
1744 {
1745 int ext = kind & 1 ? 8 : 16;
1746 int shift2 = size - ext;
1747
1748 /* Don't expand fine-grained when combining, because that will
1749 make the pattern fail. */
1750 if (! rtx_equal_function_value_matters
1751 && ! reload_in_progress && ! reload_completed)
1752 {
1753 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1754 emit_insn (gen_movsi (dest, source));
1755 break;
1756 }
1757 if (dest != source)
1758 emit_insn (gen_movsi (dest, source));
1759 operands[0] = dest;
1760 if (ext - insize)
1761 {
1762 operands[2] = GEN_INT (ext - insize);
1763 gen_shifty_hi_op (ASHIFT, operands);
1764 }
1765 emit_insn (kind & 1
1766 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1767 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1768 if (kind <= 2)
1769 {
1770 if (shift2)
1771 {
1772 operands[2] = GEN_INT (shift2);
1773 gen_shifty_op (ASHIFT, operands);
1774 }
1775 }
1776 else
1777 {
1778 if (shift2 > 0)
1779 {
1780 if (EXT_SHIFT_SIGNED (shift2))
1781 {
1782 operands[2] = GEN_INT (shift2 + 1);
1783 gen_shifty_op (ASHIFT, operands);
1784 operands[2] = GEN_INT (1);
1785 gen_shifty_op (ASHIFTRT, operands);
1786 break;
1787 }
1788 operands[2] = GEN_INT (shift2);
1789 gen_shifty_hi_op (ASHIFT, operands);
1790 }
1791 else if (shift2)
1792 {
1793 operands[2] = GEN_INT (-shift2);
1794 gen_shifty_hi_op (LSHIFTRT, operands);
1795 }
1796 emit_insn (size <= 8
1797 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1798 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1799 }
1800 break;
1801 }
1802 case 5:
1803 {
1804 int i = 16 - size;
1805 if (! rtx_equal_function_value_matters
1806 && ! reload_in_progress && ! reload_completed)
1807 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1808 else
1809 {
1810 operands[0] = dest;
1811 operands[2] = GEN_INT (16 - insize);
1812 gen_shifty_hi_op (ASHIFT, operands);
1813 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1814 }
1815 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1816 while (--i >= 0)
1817 gen_ashift (ASHIFTRT, 1, dest);
1818 break;
1819 }
1820 case 6:
1821 case 7:
1822 /* Don't expand fine-grained when combining, because that will
1823 make the pattern fail. */
1824 if (! rtx_equal_function_value_matters
1825 && ! reload_in_progress && ! reload_completed)
1826 {
1827 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1828 emit_insn (gen_movsi (dest, source));
1829 break;
1830 }
1831 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1832 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1833 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1834 operands[0] = dest;
1835 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1836 gen_shifty_op (ASHIFT, operands);
1837 if (kind == 7)
1838 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1839 break;
1840 default:
1841 return -1;
1842 }
1843 return 0;
1844 }
1845 \f
1846 /* The SH cannot load a large constant into a register, constants have to
1847 come from a pc relative load. The reference of a pc relative load
1848 instruction must be less than 1k infront of the instruction. This
1849 means that we often have to dump a constant inside a function, and
1850 generate code to branch around it.
1851
1852 It is important to minimize this, since the branches will slow things
1853 down and make things bigger.
1854
1855 Worst case code looks like:
1856
1857 mov.l L1,rn
1858 bra L2
1859 nop
1860 align
1861 L1: .long value
1862 L2:
1863 ..
1864
1865 mov.l L3,rn
1866 bra L4
1867 nop
1868 align
1869 L3: .long value
1870 L4:
1871 ..
1872
1873 We fix this by performing a scan before scheduling, which notices which
1874 instructions need to have their operands fetched from the constant table
1875 and builds the table.
1876
1877 The algorithm is:
1878
1879 scan, find an instruction which needs a pcrel move. Look forward, find the
1880 last barrier which is within MAX_COUNT bytes of the requirement.
1881 If there isn't one, make one. Process all the instructions between
1882 the find and the barrier.
1883
1884 In the above example, we can tell that L3 is within 1k of L1, so
1885 the first move can be shrunk from the 3 insn+constant sequence into
1886 just 1 insn, and the constant moved to L3 to make:
1887
1888 mov.l L1,rn
1889 ..
1890 mov.l L3,rn
1891 bra L4
1892 nop
1893 align
1894 L3:.long value
1895 L4:.long value
1896
1897 Then the second move becomes the target for the shortening process. */
1898
1899 typedef struct
1900 {
1901 rtx value; /* Value in table. */
1902 rtx label; /* Label of value. */
1903 enum machine_mode mode; /* Mode of value. */
1904 } pool_node;
1905
1906 /* The maximum number of constants that can fit into one pool, since
1907 the pc relative range is 0...1020 bytes and constants are at least 4
1908 bytes long. */
1909
1910 #define MAX_POOL_SIZE (1020/4)
1911 static pool_node pool_vector[MAX_POOL_SIZE];
1912 static int pool_size;
1913
1914 /* ??? If we need a constant in HImode which is the truncated value of a
1915 constant we need in SImode, we could combine the two entries thus saving
1916 two bytes. Is this common enough to be worth the effort of implementing
1917 it? */
1918
1919 /* ??? This stuff should be done at the same time that we shorten branches.
1920 As it is now, we must assume that all branches are the maximum size, and
1921 this causes us to almost always output constant pools sooner than
1922 necessary. */
1923
1924 /* Add a constant to the pool and return its label. */
1925
1926 static rtx
1927 add_constant (x, mode, last_value)
1928 rtx x;
1929 enum machine_mode mode;
1930 rtx last_value;
1931 {
1932 int i;
1933 rtx lab;
1934
1935 /* First see if we've already got it. */
1936 for (i = 0; i < pool_size; i++)
1937 {
1938 if (x->code == pool_vector[i].value->code
1939 && mode == pool_vector[i].mode)
1940 {
1941 if (x->code == CODE_LABEL)
1942 {
1943 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1944 continue;
1945 }
1946 if (rtx_equal_p (x, pool_vector[i].value))
1947 {
1948 lab = 0;
1949 if (! last_value
1950 || ! i
1951 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
1952 {
1953 lab = pool_vector[i].label;
1954 if (! lab)
1955 pool_vector[i].label = lab = gen_label_rtx ();
1956 }
1957 return lab;
1958 }
1959 }
1960 }
1961
1962 /* Need a new one. */
1963 pool_vector[pool_size].value = x;
1964 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
1965 lab = 0;
1966 else
1967 lab = gen_label_rtx ();
1968 pool_vector[pool_size].mode = mode;
1969 pool_vector[pool_size].label = lab;
1970 pool_size++;
1971 return lab;
1972 }
1973
1974 /* Output the literal table. */
1975
1976 static void
1977 dump_table (scan)
1978 rtx scan;
1979 {
1980 int i;
1981 int need_align = 1;
1982
1983 /* Do two passes, first time dump out the HI sized constants. */
1984
1985 for (i = 0; i < pool_size; i++)
1986 {
1987 pool_node *p = &pool_vector[i];
1988
1989 if (p->mode == HImode)
1990 {
1991 if (need_align)
1992 {
1993 scan = emit_insn_after (gen_align_2 (), scan);
1994 need_align = 0;
1995 }
1996 scan = emit_label_after (p->label, scan);
1997 scan = emit_insn_after (gen_consttable_2 (p->value), scan);
1998 }
1999 }
2000
2001 need_align = 1;
2002
2003 for (i = 0; i < pool_size; i++)
2004 {
2005 pool_node *p = &pool_vector[i];
2006
2007 switch (p->mode)
2008 {
2009 case HImode:
2010 break;
2011 case SImode:
2012 case SFmode:
2013 if (need_align)
2014 {
2015 need_align = 0;
2016 scan = emit_label_after (gen_label_rtx (), scan);
2017 scan = emit_insn_after (gen_align_4 (), scan);
2018 }
2019 if (p->label)
2020 scan = emit_label_after (p->label, scan);
2021 scan = emit_insn_after (gen_consttable_4 (p->value), scan);
2022 break;
2023 case DFmode:
2024 case DImode:
2025 if (need_align)
2026 {
2027 need_align = 0;
2028 scan = emit_label_after (gen_label_rtx (), scan);
2029 scan = emit_insn_after (gen_align_4 (), scan);
2030 }
2031 if (p->label)
2032 scan = emit_label_after (p->label, scan);
2033 scan = emit_insn_after (gen_consttable_8 (p->value), scan);
2034 break;
2035 default:
2036 abort ();
2037 break;
2038 }
2039 }
2040
2041 scan = emit_insn_after (gen_consttable_end (), scan);
2042 scan = emit_barrier_after (scan);
2043 pool_size = 0;
2044 }
2045
2046 /* Return non-zero if constant would be an ok source for a
2047 mov.w instead of a mov.l. */
2048
2049 static int
2050 hi_const (src)
2051 rtx src;
2052 {
2053 return (GET_CODE (src) == CONST_INT
2054 && INTVAL (src) >= -32768
2055 && INTVAL (src) <= 32767);
2056 }
2057
2058 /* Non-zero if the insn is a move instruction which needs to be fixed. */
2059
2060 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2061 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
2062 need to fix it if the input value is CONST_OK_FOR_I. */
2063
2064 static int
2065 broken_move (insn)
2066 rtx insn;
2067 {
2068 if (GET_CODE (insn) == INSN)
2069 {
2070 rtx pat = PATTERN (insn);
2071 if (GET_CODE (pat) == PARALLEL)
2072 pat = XVECEXP (pat, 0, 0);
2073 if (GET_CODE (pat) == SET
2074 /* We can load any 8 bit value if we don't care what the high
2075 order bits end up as. */
2076 && GET_MODE (SET_DEST (pat)) != QImode
2077 && (CONSTANT_P (SET_SRC (pat))
2078 /* Match mova_const. */
2079 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2080 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2081 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2082 && ! (TARGET_SH3E
2083 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2084 && (fp_zero_operand (SET_SRC (pat))
2085 || fp_one_operand (SET_SRC (pat)))
2086 /* ??? If this is a -m4 or -m4-single compilation, we don't
2087 know the current setting of fpscr, so disable fldi. */
2088 && (! TARGET_SH4 || TARGET_FMOVD)
2089 && GET_CODE (SET_DEST (pat)) == REG
2090 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2091 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2092 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2093 return 1;
2094 }
2095
2096 return 0;
2097 }
2098
2099 static int
2100 mova_p (insn)
2101 rtx insn;
2102 {
2103 return (GET_CODE (insn) == INSN
2104 && GET_CODE (PATTERN (insn)) == SET
2105 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2106 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2107 /* Don't match mova_const. */
2108 && XVECEXP (SET_SRC (PATTERN (insn)), 0, 0) == LABEL_REF);
2109 }
2110
2111 /* Find the last barrier from insn FROM which is close enough to hold the
2112 constant pool. If we can't find one, then create one near the end of
2113 the range. */
2114
2115 static rtx
2116 find_barrier (num_mova, mova, from)
2117 int num_mova;
2118 rtx mova, from;
2119 {
2120 int count_si = 0;
2121 int count_hi = 0;
2122 int found_hi = 0;
2123 int found_si = 0;
2124 int hi_align = 2;
2125 int si_align = 2;
2126 int leading_mova = num_mova;
2127 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2128 int si_limit;
2129 int hi_limit;
2130
2131 /* For HImode: range is 510, add 4 because pc counts from address of
2132 second instruction after this one, subtract 2 for the jump instruction
2133 that we may need to emit before the table, subtract 2 for the instruction
2134 that fills the jump delay slot (in very rare cases, reorg will take an
2135 instruction from after the constant pool or will leave the delay slot
2136 empty). This gives 510.
2137 For SImode: range is 1020, add 4 because pc counts from address of
2138 second instruction after this one, subtract 2 in case pc is 2 byte
2139 aligned, subtract 2 for the jump instruction that we may need to emit
2140 before the table, subtract 2 for the instruction that fills the jump
2141 delay slot. This gives 1018. */
2142
2143 /* The branch will always be shortened now that the reference address for
2144 forward branches is the successor address, thus we need no longer make
2145 adjustments to the [sh]i_limit for -O0. */
2146
2147 si_limit = 1018;
2148 hi_limit = 510;
2149
2150 while (from && count_si < si_limit && count_hi < hi_limit)
2151 {
2152 int inc = get_attr_length (from);
2153 int new_align = 1;
2154
2155 if (GET_CODE (from) == CODE_LABEL)
2156 {
2157 if (optimize)
2158 new_align = 1 << label_to_alignment (from);
2159 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2160 new_align = 1 << barrier_align (from);
2161 else
2162 new_align = 1;
2163 inc = 0;
2164 }
2165
2166 if (GET_CODE (from) == BARRIER)
2167 {
2168
2169 found_barrier = from;
2170
2171 /* If we are at the end of the function, or in front of an alignment
2172 instruction, we need not insert an extra alignment. We prefer
2173 this kind of barrier. */
2174 if (barrier_align (from) > 2)
2175 good_barrier = from;
2176 }
2177
2178 if (broken_move (from))
2179 {
2180 rtx pat, src, dst;
2181 enum machine_mode mode;
2182
2183 pat = PATTERN (from);
2184 if (GET_CODE (pat) == PARALLEL)
2185 pat = XVECEXP (pat, 0, 0);
2186 src = SET_SRC (pat);
2187 dst = SET_DEST (pat);
2188 mode = GET_MODE (dst);
2189
2190 /* We must explicitly check the mode, because sometimes the
2191 front end will generate code to load unsigned constants into
2192 HImode targets without properly sign extending them. */
2193 if (mode == HImode
2194 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2195 {
2196 found_hi += 2;
2197 /* We put the short constants before the long constants, so
2198 we must count the length of short constants in the range
2199 for the long constants. */
2200 /* ??? This isn't optimal, but is easy to do. */
2201 si_limit -= 2;
2202 }
2203 else
2204 {
2205 while (si_align > 2 && found_si + si_align - 2 > count_si)
2206 si_align >>= 1;
2207 if (found_si > count_si)
2208 count_si = found_si;
2209 found_si += GET_MODE_SIZE (mode);
2210 if (num_mova)
2211 si_limit -= GET_MODE_SIZE (mode);
2212 }
2213
2214 /* See the code in machine_dependent_reorg, which has a similar if
2215 statement that generates a new mova insn in many cases. */
2216 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
2217 inc += 2;
2218 }
2219
2220 if (mova_p (from))
2221 {
2222 if (! num_mova++)
2223 {
2224 leading_mova = 0;
2225 mova = from;
2226 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2227 }
2228 if (found_si > count_si)
2229 count_si = found_si;
2230 }
2231 else if (GET_CODE (from) == JUMP_INSN
2232 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2233 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2234 {
2235 if (num_mova)
2236 num_mova--;
2237 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2238 {
2239 /* We have just passed the barrier in front of the
2240 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2241 the ADDR_DIFF_VEC is accessed as data, just like our pool
2242 constants, this is a good opportunity to accommodate what
2243 we have gathered so far.
2244 If we waited any longer, we could end up at a barrier in
2245 front of code, which gives worse cache usage for separated
2246 instruction / data caches. */
2247 good_barrier = found_barrier;
2248 break;
2249 }
2250 else
2251 {
2252 rtx body = PATTERN (from);
2253 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2254 }
2255 }
2256 /* For the SH1, we generate alignments even after jumps-around-jumps. */
2257 else if (GET_CODE (from) == JUMP_INSN
2258 && ! TARGET_SH2
2259 && ! TARGET_SMALLCODE)
2260 new_align = 4;
2261
2262 if (found_si)
2263 {
2264 count_si += inc;
2265 if (new_align > si_align)
2266 {
2267 si_limit -= (count_si - 1) & (new_align - si_align);
2268 si_align = new_align;
2269 }
2270 count_si = (count_si + new_align - 1) & -new_align;
2271 }
2272 if (found_hi)
2273 {
2274 count_hi += inc;
2275 if (new_align > hi_align)
2276 {
2277 hi_limit -= (count_hi - 1) & (new_align - hi_align);
2278 hi_align = new_align;
2279 }
2280 count_hi = (count_hi + new_align - 1) & -new_align;
2281 }
2282 from = NEXT_INSN (from);
2283 }
2284
2285 if (num_mova)
2286 {
2287 if (leading_mova)
2288 {
2289 /* Try as we might, the leading mova is out of range. Change
2290 it into a load (which will become a pcload) and retry. */
2291 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2292 INSN_CODE (mova) = -1;
2293 return find_barrier (0, 0, mova);
2294 }
2295 else
2296 {
2297 /* Insert the constant pool table before the mova instruction,
2298 to prevent the mova label reference from going out of range. */
2299 from = mova;
2300 good_barrier = found_barrier = barrier_before_mova;
2301 }
2302 }
2303
2304 if (found_barrier)
2305 {
2306 if (good_barrier && next_real_insn (found_barrier))
2307 found_barrier = good_barrier;
2308 }
2309 else
2310 {
2311 /* We didn't find a barrier in time to dump our stuff,
2312 so we'll make one. */
2313 rtx label = gen_label_rtx ();
2314
2315 /* If we exceeded the range, then we must back up over the last
2316 instruction we looked at. Otherwise, we just need to undo the
2317 NEXT_INSN at the end of the loop. */
2318 if (count_hi > hi_limit || count_si > si_limit)
2319 from = PREV_INSN (PREV_INSN (from));
2320 else
2321 from = PREV_INSN (from);
2322
2323 /* Walk back to be just before any jump or label.
2324 Putting it before a label reduces the number of times the branch
2325 around the constant pool table will be hit. Putting it before
2326 a jump makes it more likely that the bra delay slot will be
2327 filled. */
2328 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2329 || GET_CODE (from) == CODE_LABEL)
2330 from = PREV_INSN (from);
2331
2332 from = emit_jump_insn_after (gen_jump (label), from);
2333 JUMP_LABEL (from) = label;
2334 LABEL_NUSES (label) = 1;
2335 found_barrier = emit_barrier_after (from);
2336 emit_label_after (label, found_barrier);
2337 }
2338
2339 return found_barrier;
2340 }
2341
2342 /* If the instruction INSN is implemented by a special function, and we can
2343 positively find the register that is used to call the sfunc, and this
2344 register is not used anywhere else in this instruction - except as the
2345 destination of a set, return this register; else, return 0. */
2346 rtx
2347 sfunc_uses_reg (insn)
2348 rtx insn;
2349 {
2350 int i;
2351 rtx pattern, part, reg_part, reg;
2352
2353 if (GET_CODE (insn) != INSN)
2354 return 0;
2355 pattern = PATTERN (insn);
2356 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2357 return 0;
2358
2359 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2360 {
2361 part = XVECEXP (pattern, 0, i);
2362 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2363 reg_part = part;
2364 }
2365 if (! reg_part)
2366 return 0;
2367 reg = XEXP (reg_part, 0);
2368 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2369 {
2370 part = XVECEXP (pattern, 0, i);
2371 if (part == reg_part || GET_CODE (part) == CLOBBER)
2372 continue;
2373 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2374 && GET_CODE (SET_DEST (part)) == REG)
2375 ? SET_SRC (part) : part)))
2376 return 0;
2377 }
2378 return reg;
2379 }
2380
2381 /* See if the only way in which INSN uses REG is by calling it, or by
2382 setting it while calling it. Set *SET to a SET rtx if the register
2383 is set by INSN. */
2384
2385 static int
2386 noncall_uses_reg (reg, insn, set)
2387 rtx reg;
2388 rtx insn;
2389 rtx *set;
2390 {
2391 rtx pattern, reg2;
2392
2393 *set = NULL_RTX;
2394
2395 reg2 = sfunc_uses_reg (insn);
2396 if (reg2 && REGNO (reg2) == REGNO (reg))
2397 {
2398 pattern = single_set (insn);
2399 if (pattern
2400 && GET_CODE (SET_DEST (pattern)) == REG
2401 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2402 *set = pattern;
2403 return 0;
2404 }
2405 if (GET_CODE (insn) != CALL_INSN)
2406 {
2407 /* We don't use rtx_equal_p because we don't care if the mode is
2408 different. */
2409 pattern = single_set (insn);
2410 if (pattern
2411 && GET_CODE (SET_DEST (pattern)) == REG
2412 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2413 {
2414 rtx par, part;
2415 int i;
2416
2417 *set = pattern;
2418 par = PATTERN (insn);
2419 if (GET_CODE (par) == PARALLEL)
2420 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2421 {
2422 part = XVECEXP (par, 0, i);
2423 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2424 return 1;
2425 }
2426 return reg_mentioned_p (reg, SET_SRC (pattern));
2427 }
2428
2429 return 1;
2430 }
2431
2432 pattern = PATTERN (insn);
2433
2434 if (GET_CODE (pattern) == PARALLEL)
2435 {
2436 int i;
2437
2438 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2439 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2440 return 1;
2441 pattern = XVECEXP (pattern, 0, 0);
2442 }
2443
2444 if (GET_CODE (pattern) == SET)
2445 {
2446 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2447 {
2448 /* We don't use rtx_equal_p, because we don't care if the
2449 mode is different. */
2450 if (GET_CODE (SET_DEST (pattern)) != REG
2451 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2452 return 1;
2453
2454 *set = pattern;
2455 }
2456
2457 pattern = SET_SRC (pattern);
2458 }
2459
2460 if (GET_CODE (pattern) != CALL
2461 || GET_CODE (XEXP (pattern, 0)) != MEM
2462 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2463 return 1;
2464
2465 return 0;
2466 }
2467
2468 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2469 general registers. Bits 0..15 mean that the respective registers
2470 are used as inputs in the instruction. Bits 16..31 mean that the
2471 registers 0..15, respectively, are used as outputs, or are clobbered.
2472 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2473 int
2474 regs_used (x, is_dest)
2475 rtx x; int is_dest;
2476 {
2477 enum rtx_code code;
2478 const char *fmt;
2479 int i, used = 0;
2480
2481 if (! x)
2482 return used;
2483 code = GET_CODE (x);
2484 switch (code)
2485 {
2486 case REG:
2487 if (REGNO (x) < 16)
2488 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2489 << (REGNO (x) + is_dest));
2490 return 0;
2491 case SUBREG:
2492 {
2493 rtx y = SUBREG_REG (x);
2494
2495 if (GET_CODE (y) != REG)
2496 break;
2497 if (REGNO (y) < 16)
2498 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2499 << (REGNO (y) + SUBREG_WORD (x) + is_dest));
2500 return 0;
2501 }
2502 case SET:
2503 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2504 case RETURN:
2505 /* If there was a return value, it must have been indicated with USE. */
2506 return 0x00ffff00;
2507 case CLOBBER:
2508 is_dest = 1;
2509 break;
2510 case MEM:
2511 is_dest = 0;
2512 break;
2513 case CALL:
2514 used |= 0x00ff00f0;
2515 break;
2516 default:
2517 break;
2518 }
2519
2520 fmt = GET_RTX_FORMAT (code);
2521
2522 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2523 {
2524 if (fmt[i] == 'E')
2525 {
2526 register int j;
2527 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2528 used |= regs_used (XVECEXP (x, i, j), is_dest);
2529 }
2530 else if (fmt[i] == 'e')
2531 used |= regs_used (XEXP (x, i), is_dest);
2532 }
2533 return used;
2534 }
2535
2536 /* Create an instruction that prevents redirection of a conditional branch
2537 to the destination of the JUMP with address ADDR.
2538 If the branch needs to be implemented as an indirect jump, try to find
2539 a scratch register for it.
2540 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2541 If any preceding insn that doesn't fit into a delay slot is good enough,
2542 pass 1. Pass 2 if a definite blocking insn is needed.
2543 -1 is used internally to avoid deep recursion.
2544 If a blocking instruction is made or recognized, return it. */
2545
2546 static rtx
2547 gen_block_redirect (jump, addr, need_block)
2548 rtx jump;
2549 int addr, need_block;
2550 {
2551 int dead = 0;
2552 rtx prev = prev_nonnote_insn (jump);
2553 rtx dest;
2554
2555 /* First, check if we already have an instruction that satisfies our need. */
2556 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2557 {
2558 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2559 return prev;
2560 if (GET_CODE (PATTERN (prev)) == USE
2561 || GET_CODE (PATTERN (prev)) == CLOBBER
2562 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2563 prev = jump;
2564 else if ((need_block &= ~1) < 0)
2565 return prev;
2566 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2567 need_block = 0;
2568 }
2569 /* We can't use JUMP_LABEL here because it might be undefined
2570 when not optimizing. */
2571 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2572 /* If the branch is out of range, try to find a scratch register for it. */
2573 if (optimize
2574 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U > 4092 + 4098))
2575 {
2576 rtx scan;
2577 /* Don't look for the stack pointer as a scratch register,
2578 it would cause trouble if an interrupt occurred. */
2579 unsigned try = 0x7fff, used;
2580 int jump_left = flag_expensive_optimizations + 1;
2581
2582 /* It is likely that the most recent eligible instruction is wanted for
2583 the delay slot. Therefore, find out which registers it uses, and
2584 try to avoid using them. */
2585
2586 for (scan = jump; (scan = PREV_INSN (scan)); )
2587 {
2588 enum rtx_code code;
2589
2590 if (INSN_DELETED_P (scan))
2591 continue;
2592 code = GET_CODE (scan);
2593 if (code == CODE_LABEL || code == JUMP_INSN)
2594 break;
2595 if (code == INSN
2596 && GET_CODE (PATTERN (scan)) != USE
2597 && GET_CODE (PATTERN (scan)) != CLOBBER
2598 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2599 {
2600 try &= ~regs_used (PATTERN (scan), 0);
2601 break;
2602 }
2603 }
2604 for (used = dead = 0, scan = JUMP_LABEL (jump);
2605 (scan = NEXT_INSN (scan)); )
2606 {
2607 enum rtx_code code;
2608
2609 if (INSN_DELETED_P (scan))
2610 continue;
2611 code = GET_CODE (scan);
2612 if (GET_RTX_CLASS (code) == 'i')
2613 {
2614 used |= regs_used (PATTERN (scan), 0);
2615 if (code == CALL_INSN)
2616 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2617 dead |= (used >> 16) & ~used;
2618 if (dead & try)
2619 {
2620 dead &= try;
2621 break;
2622 }
2623 if (code == JUMP_INSN)
2624 {
2625 if (jump_left-- && simplejump_p (scan))
2626 scan = JUMP_LABEL (scan);
2627 else
2628 break;
2629 }
2630 }
2631 }
2632 /* Mask out the stack pointer again, in case it was
2633 the only 'free' register we have found. */
2634 dead &= 0x7fff;
2635 }
2636 /* If the immediate destination is still in range, check for possible
2637 threading with a jump beyond the delay slot insn.
2638 Don't check if we are called recursively; the jump has been or will be
2639 checked in a different invocation then. */
2640
2641 else if (optimize && need_block >= 0)
2642 {
2643 rtx next = next_active_insn (next_active_insn (dest));
2644 if (next && GET_CODE (next) == JUMP_INSN
2645 && GET_CODE (PATTERN (next)) == SET
2646 && recog_memoized (next) == CODE_FOR_jump)
2647 {
2648 dest = JUMP_LABEL (next);
2649 if (dest
2650 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + 4092U
2651 > 4092 + 4098))
2652 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
2653 }
2654 }
2655
2656 if (dead)
2657 {
2658 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
2659
2660 /* It would be nice if we could convert the jump into an indirect
2661 jump / far branch right now, and thus exposing all constituent
2662 instructions to further optimization. However, reorg uses
2663 simplejump_p to determine if there is an unconditional jump where
2664 it should try to schedule instructions from the target of the
2665 branch; simplejump_p fails for indirect jumps even if they have
2666 a JUMP_LABEL. */
2667 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2668 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2669 , jump);
2670 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2671 return insn;
2672 }
2673 else if (need_block)
2674 /* We can't use JUMP_LABEL here because it might be undefined
2675 when not optimizing. */
2676 return emit_insn_before (gen_block_branch_redirect
2677 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2678 , jump);
2679 return prev;
2680 }
2681
2682 #define CONDJUMP_MIN -252
2683 #define CONDJUMP_MAX 262
2684 struct far_branch
2685 {
2686 /* A label (to be placed) in front of the jump
2687 that jumps to our ultimate destination. */
2688 rtx near_label;
2689 /* Where we are going to insert it if we cannot move the jump any farther,
2690 or the jump itself if we have picked up an existing jump. */
2691 rtx insert_place;
2692 /* The ultimate destination. */
2693 rtx far_label;
2694 struct far_branch *prev;
2695 /* If the branch has already been created, its address;
2696 else the address of its first prospective user. */
2697 int address;
2698 };
2699
2700 static void gen_far_branch PARAMS ((struct far_branch *));
2701 enum mdep_reorg_phase_e mdep_reorg_phase;
2702 void
2703 gen_far_branch (bp)
2704 struct far_branch *bp;
2705 {
2706 rtx insn = bp->insert_place;
2707 rtx jump;
2708 rtx label = gen_label_rtx ();
2709
2710 emit_label_after (label, insn);
2711 if (bp->far_label)
2712 {
2713 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2714 LABEL_NUSES (bp->far_label)++;
2715 }
2716 else
2717 jump = emit_jump_insn_after (gen_return (), insn);
2718 /* Emit a barrier so that reorg knows that any following instructions
2719 are not reachable via a fall-through path.
2720 But don't do this when not optimizing, since we wouldn't supress the
2721 alignment for the barrier then, and could end up with out-of-range
2722 pc-relative loads. */
2723 if (optimize)
2724 emit_barrier_after (jump);
2725 emit_label_after (bp->near_label, insn);
2726 JUMP_LABEL (jump) = bp->far_label;
2727 if (! invert_jump (insn, label, 1))
2728 abort ();
2729 /* Prevent reorg from undoing our splits. */
2730 gen_block_redirect (jump, bp->address += 2, 2);
2731 }
2732
2733 /* Fix up ADDR_DIFF_VECs. */
2734 void
2735 fixup_addr_diff_vecs (first)
2736 rtx first;
2737 {
2738 rtx insn;
2739
2740 for (insn = first; insn; insn = NEXT_INSN (insn))
2741 {
2742 rtx vec_lab, pat, prev, prevpat, x, braf_label;
2743
2744 if (GET_CODE (insn) != JUMP_INSN
2745 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2746 continue;
2747 pat = PATTERN (insn);
2748 vec_lab = XEXP (XEXP (pat, 0), 0);
2749
2750 /* Search the matching casesi_jump_2. */
2751 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2752 {
2753 if (GET_CODE (prev) != JUMP_INSN)
2754 continue;
2755 prevpat = PATTERN (prev);
2756 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2757 continue;
2758 x = XVECEXP (prevpat, 0, 1);
2759 if (GET_CODE (x) != USE)
2760 continue;
2761 x = XEXP (x, 0);
2762 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2763 break;
2764 }
2765
2766 /* Emit the reference label of the braf where it belongs, right after
2767 the casesi_jump_2 (i.e. braf). */
2768 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2769 emit_label_after (braf_label, prev);
2770
2771 /* Fix up the ADDR_DIF_VEC to be relative
2772 to the reference address of the braf. */
2773 XEXP (XEXP (pat, 0), 0) = braf_label;
2774 }
2775 }
2776
2777 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2778 a barrier. Return the base 2 logarithm of the desired alignment. */
2779 int
2780 barrier_align (barrier_or_label)
2781 rtx barrier_or_label;
2782 {
2783 rtx next = next_real_insn (barrier_or_label), pat, prev;
2784 int slot, credit, jump_to_next;
2785
2786 if (! next)
2787 return 0;
2788
2789 pat = PATTERN (next);
2790
2791 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2792 return 2;
2793
2794 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
2795 /* This is a barrier in front of a constant table. */
2796 return 0;
2797
2798 prev = prev_real_insn (barrier_or_label);
2799 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2800 {
2801 pat = PATTERN (prev);
2802 /* If this is a very small table, we want to keep the alignment after
2803 the table to the minimum for proper code alignment. */
2804 return ((TARGET_SMALLCODE
2805 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2806 <= 1 << (CACHE_LOG - 2)))
2807 ? 1 : CACHE_LOG);
2808 }
2809
2810 if (TARGET_SMALLCODE)
2811 return 0;
2812
2813 if (! TARGET_SH2 || ! optimize)
2814 return CACHE_LOG;
2815
2816 /* When fixing up pcloads, a constant table might be inserted just before
2817 the basic block that ends with the barrier. Thus, we can't trust the
2818 instruction lengths before that. */
2819 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2820 {
2821 /* Check if there is an immediately preceding branch to the insn beyond
2822 the barrier. We must weight the cost of discarding useful information
2823 from the current cache line when executing this branch and there is
2824 an alignment, against that of fetching unneeded insn in front of the
2825 branch target when there is no alignment. */
2826
2827 /* There are two delay_slot cases to consider. One is the simple case
2828 where the preceding branch is to the insn beyond the barrier (simple
2829 delay slot filling), and the other is where the preceding branch has
2830 a delay slot that is a duplicate of the insn after the barrier
2831 (fill_eager_delay_slots) and the branch is to the insn after the insn
2832 after the barrier. */
2833
2834 /* PREV is presumed to be the JUMP_INSN for the barrier under
2835 investigation. Skip to the insn before it. */
2836 prev = prev_real_insn (prev);
2837
2838 for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
2839 credit >= 0 && prev && GET_CODE (prev) == INSN;
2840 prev = prev_real_insn (prev))
2841 {
2842 jump_to_next = 0;
2843 if (GET_CODE (PATTERN (prev)) == USE
2844 || GET_CODE (PATTERN (prev)) == CLOBBER)
2845 continue;
2846 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2847 {
2848 prev = XVECEXP (PATTERN (prev), 0, 1);
2849 if (INSN_UID (prev) == INSN_UID (next))
2850 {
2851 /* Delay slot was filled with insn at jump target. */
2852 jump_to_next = 1;
2853 continue;
2854 }
2855 }
2856
2857 if (slot &&
2858 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2859 slot = 0;
2860 credit -= get_attr_length (prev);
2861 }
2862 if (prev
2863 && GET_CODE (prev) == JUMP_INSN
2864 && JUMP_LABEL (prev)
2865 && (jump_to_next || next_real_insn (JUMP_LABEL (prev)) == next))
2866 {
2867 rtx pat = PATTERN (prev);
2868 if (GET_CODE (pat) == PARALLEL)
2869 pat = XVECEXP (pat, 0, 0);
2870 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
2871 return 0;
2872 }
2873 }
2874
2875 return CACHE_LOG;
2876 }
2877
2878 /* If we are inside a phony loop, almost any kind of label can turn up as the
2879 first one in the loop. Aligning a braf label causes incorrect switch
2880 destination addresses; we can detect braf labels because they are
2881 followed by a BARRIER.
2882 Applying loop alignment to small constant or switch tables is a waste
2883 of space, so we suppress this too. */
2884 int
2885 sh_loop_align (label)
2886 rtx label;
2887 {
2888 rtx next = label;
2889
2890 do
2891 next = next_nonnote_insn (next);
2892 while (next && GET_CODE (next) == CODE_LABEL);
2893
2894 if (! next
2895 || ! INSN_P (next)
2896 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
2897 || recog_memoized (next) == CODE_FOR_consttable_2)
2898 return 0;
2899 return 2;
2900 }
2901
2902 /* Exported to toplev.c.
2903
2904 Do a final pass over the function, just before delayed branch
2905 scheduling. */
2906
2907 void
2908 machine_dependent_reorg (first)
2909 rtx first;
2910 {
2911 rtx insn, mova;
2912 int num_mova;
2913 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
2914 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
2915
2916 /* We must split call insns before introducing `mova's. If we're
2917 optimizing, they'll have already been split. Otherwise, make
2918 sure we don't split them too late. */
2919 if (! optimize)
2920 split_all_insns (0);
2921
2922 /* If relaxing, generate pseudo-ops to associate function calls with
2923 the symbols they call. It does no harm to not generate these
2924 pseudo-ops. However, when we can generate them, it enables to
2925 linker to potentially relax the jsr to a bsr, and eliminate the
2926 register load and, possibly, the constant pool entry. */
2927
2928 mdep_reorg_phase = SH_INSERT_USES_LABELS;
2929 if (TARGET_RELAX)
2930 {
2931 /* Remove all REG_LABEL notes. We want to use them for our own
2932 purposes. This works because none of the remaining passes
2933 need to look at them.
2934
2935 ??? But it may break in the future. We should use a machine
2936 dependent REG_NOTE, or some other approach entirely. */
2937 for (insn = first; insn; insn = NEXT_INSN (insn))
2938 {
2939 if (INSN_P (insn))
2940 {
2941 rtx note;
2942
2943 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
2944 remove_note (insn, note);
2945 }
2946 }
2947
2948 for (insn = first; insn; insn = NEXT_INSN (insn))
2949 {
2950 rtx pattern, reg, link, set, scan, dies, label;
2951 int rescan = 0, foundinsn = 0;
2952
2953 if (GET_CODE (insn) == CALL_INSN)
2954 {
2955 pattern = PATTERN (insn);
2956
2957 if (GET_CODE (pattern) == PARALLEL)
2958 pattern = XVECEXP (pattern, 0, 0);
2959 if (GET_CODE (pattern) == SET)
2960 pattern = SET_SRC (pattern);
2961
2962 if (GET_CODE (pattern) != CALL
2963 || GET_CODE (XEXP (pattern, 0)) != MEM)
2964 continue;
2965
2966 reg = XEXP (XEXP (pattern, 0), 0);
2967 }
2968 else
2969 {
2970 reg = sfunc_uses_reg (insn);
2971 if (! reg)
2972 continue;
2973 }
2974
2975 if (GET_CODE (reg) != REG)
2976 continue;
2977
2978 /* This is a function call via REG. If the only uses of REG
2979 between the time that it is set and the time that it dies
2980 are in function calls, then we can associate all the
2981 function calls with the setting of REG. */
2982
2983 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
2984 {
2985 if (REG_NOTE_KIND (link) != 0)
2986 continue;
2987 set = single_set (XEXP (link, 0));
2988 if (set && rtx_equal_p (reg, SET_DEST (set)))
2989 {
2990 link = XEXP (link, 0);
2991 break;
2992 }
2993 }
2994
2995 if (! link)
2996 {
2997 /* ??? Sometimes global register allocation will have
2998 deleted the insn pointed to by LOG_LINKS. Try
2999 scanning backward to find where the register is set. */
3000 for (scan = PREV_INSN (insn);
3001 scan && GET_CODE (scan) != CODE_LABEL;
3002 scan = PREV_INSN (scan))
3003 {
3004 if (! INSN_P (scan))
3005 continue;
3006
3007 if (! reg_mentioned_p (reg, scan))
3008 continue;
3009
3010 if (noncall_uses_reg (reg, scan, &set))
3011 break;
3012
3013 if (set)
3014 {
3015 link = scan;
3016 break;
3017 }
3018 }
3019 }
3020
3021 if (! link)
3022 continue;
3023
3024 /* The register is set at LINK. */
3025
3026 /* We can only optimize the function call if the register is
3027 being set to a symbol. In theory, we could sometimes
3028 optimize calls to a constant location, but the assembler
3029 and linker do not support that at present. */
3030 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3031 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3032 continue;
3033
3034 /* Scan forward from LINK to the place where REG dies, and
3035 make sure that the only insns which use REG are
3036 themselves function calls. */
3037
3038 /* ??? This doesn't work for call targets that were allocated
3039 by reload, since there may not be a REG_DEAD note for the
3040 register. */
3041
3042 dies = NULL_RTX;
3043 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3044 {
3045 rtx scanset;
3046
3047 /* Don't try to trace forward past a CODE_LABEL if we haven't
3048 seen INSN yet. Ordinarily, we will only find the setting insn
3049 in LOG_LINKS if it is in the same basic block. However,
3050 cross-jumping can insert code labels in between the load and
3051 the call, and can result in situations where a single call
3052 insn may have two targets depending on where we came from. */
3053
3054 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3055 break;
3056
3057 if (! INSN_P (scan))
3058 continue;
3059
3060 /* Don't try to trace forward past a JUMP. To optimize
3061 safely, we would have to check that all the
3062 instructions at the jump destination did not use REG. */
3063
3064 if (GET_CODE (scan) == JUMP_INSN)
3065 break;
3066
3067 if (! reg_mentioned_p (reg, scan))
3068 continue;
3069
3070 if (noncall_uses_reg (reg, scan, &scanset))
3071 break;
3072
3073 if (scan == insn)
3074 foundinsn = 1;
3075
3076 if (scan != insn
3077 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3078 {
3079 /* There is a function call to this register other
3080 than the one we are checking. If we optimize
3081 this call, we need to rescan again below. */
3082 rescan = 1;
3083 }
3084
3085 /* ??? We shouldn't have to worry about SCANSET here.
3086 We should just be able to check for a REG_DEAD note
3087 on a function call. However, the REG_DEAD notes are
3088 apparently not dependable around libcalls; c-torture
3089 execute/920501-2 is a test case. If SCANSET is set,
3090 then this insn sets the register, so it must have
3091 died earlier. Unfortunately, this will only handle
3092 the cases in which the register is, in fact, set in a
3093 later insn. */
3094
3095 /* ??? We shouldn't have to use FOUNDINSN here.
3096 However, the LOG_LINKS fields are apparently not
3097 entirely reliable around libcalls;
3098 newlib/libm/math/e_pow.c is a test case. Sometimes
3099 an insn will appear in LOG_LINKS even though it is
3100 not the most recent insn which sets the register. */
3101
3102 if (foundinsn
3103 && (scanset
3104 || find_reg_note (scan, REG_DEAD, reg)))
3105 {
3106 dies = scan;
3107 break;
3108 }
3109 }
3110
3111 if (! dies)
3112 {
3113 /* Either there was a branch, or some insn used REG
3114 other than as a function call address. */
3115 continue;
3116 }
3117
3118 /* Create a code label, and put it in a REG_LABEL note on
3119 the insn which sets the register, and on each call insn
3120 which uses the register. In final_prescan_insn we look
3121 for the REG_LABEL notes, and output the appropriate label
3122 or pseudo-op. */
3123
3124 label = gen_label_rtx ();
3125 REG_NOTES (link) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3126 REG_NOTES (link));
3127 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, label,
3128 REG_NOTES (insn));
3129 if (rescan)
3130 {
3131 scan = link;
3132 do
3133 {
3134 rtx reg2;
3135
3136 scan = NEXT_INSN (scan);
3137 if (scan != insn
3138 && ((GET_CODE (scan) == CALL_INSN
3139 && reg_mentioned_p (reg, scan))
3140 || ((reg2 = sfunc_uses_reg (scan))
3141 && REGNO (reg2) == REGNO (reg))))
3142 REG_NOTES (scan)
3143 = gen_rtx_EXPR_LIST (REG_LABEL, label, REG_NOTES (scan));
3144 }
3145 while (scan != dies);
3146 }
3147 }
3148 }
3149
3150 if (TARGET_SH2)
3151 fixup_addr_diff_vecs (first);
3152
3153 if (optimize)
3154 {
3155 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3156 shorten_branches (first);
3157 }
3158 /* Scan the function looking for move instructions which have to be
3159 changed to pc-relative loads and insert the literal tables. */
3160
3161 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3162 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3163 {
3164 if (mova_p (insn))
3165 {
3166 if (! num_mova++)
3167 mova = insn;
3168 }
3169 else if (GET_CODE (insn) == JUMP_INSN
3170 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3171 && num_mova)
3172 {
3173 rtx scan;
3174 int total;
3175
3176 num_mova--;
3177
3178 /* Some code might have been inserted between the mova and
3179 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3180 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3181 total += get_attr_length (scan);
3182
3183 /* range of mova is 1020, add 4 because pc counts from address of
3184 second instruction after this one, subtract 2 in case pc is 2
3185 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3186 cancels out with alignment effects of the mova itself. */
3187 if (total > 1022)
3188 {
3189 /* Change the mova into a load, and restart scanning
3190 there. broken_move will then return true for mova. */
3191 SET_SRC (PATTERN (mova))
3192 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3193 INSN_CODE (mova) = -1;
3194 insn = mova;
3195 }
3196 }
3197 if (broken_move (insn))
3198 {
3199 rtx scan;
3200 /* Scan ahead looking for a barrier to stick the constant table
3201 behind. */
3202 rtx barrier = find_barrier (num_mova, mova, insn);
3203 rtx last_float_move, last_float = 0, *last_float_addr;
3204
3205 if (num_mova && ! mova_p (mova))
3206 {
3207 /* find_barrier had to change the first mova into a
3208 pcload; thus, we have to start with this new pcload. */
3209 insn = mova;
3210 num_mova = 0;
3211 }
3212 /* Now find all the moves between the points and modify them. */
3213 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3214 {
3215 if (GET_CODE (scan) == CODE_LABEL)
3216 last_float = 0;
3217 if (broken_move (scan))
3218 {
3219 rtx *patp = &PATTERN (scan), pat = *patp;
3220 rtx src, dst;
3221 rtx lab;
3222 rtx newsrc;
3223 enum machine_mode mode;
3224
3225 if (GET_CODE (pat) == PARALLEL)
3226 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3227 src = SET_SRC (pat);
3228 dst = SET_DEST (pat);
3229 mode = GET_MODE (dst);
3230
3231 if (mode == SImode && hi_const (src)
3232 && REGNO (dst) != FPUL_REG)
3233 {
3234 int offset = 0;
3235
3236 mode = HImode;
3237 while (GET_CODE (dst) == SUBREG)
3238 {
3239 offset += SUBREG_WORD (dst);
3240 dst = SUBREG_REG (dst);
3241 }
3242 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
3243 }
3244
3245 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3246 {
3247 /* This must be an insn that clobbers r0. */
3248 rtx clobber = XVECEXP (PATTERN (scan), 0,
3249 XVECLEN (PATTERN (scan), 0) - 1);
3250
3251 if (GET_CODE (clobber) != CLOBBER
3252 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
3253 abort ();
3254
3255 if (last_float
3256 && reg_set_between_p (r0_rtx, last_float_move, scan))
3257 last_float = 0;
3258 lab = add_constant (src, mode, last_float);
3259 if (lab)
3260 emit_insn_before (gen_mova (lab), scan);
3261 else
3262 {
3263 /* There will be a REG_UNUSED note for r0 on
3264 LAST_FLOAT_MOVE; we have to change it to REG_INC,
3265 lest reorg:mark_target_live_regs will not
3266 consider r0 to be used, and we end up with delay
3267 slot insn in front of SCAN that clobbers r0. */
3268 rtx note
3269 = find_regno_note (last_float_move, REG_UNUSED, 0);
3270
3271 /* If we are not optimizing, then there may not be
3272 a note. */
3273 if (note)
3274 PUT_MODE (note, REG_INC);
3275
3276 *last_float_addr = r0_inc_rtx;
3277 }
3278 last_float_move = scan;
3279 last_float = src;
3280 newsrc = gen_rtx (MEM, mode,
3281 (((TARGET_SH4 && ! TARGET_FMOVD)
3282 || REGNO (dst) == FPUL_REG)
3283 ? r0_inc_rtx
3284 : r0_rtx));
3285 last_float_addr = &XEXP (newsrc, 0);
3286
3287 /* Remove the clobber of r0. */
3288 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
3289 }
3290 /* This is a mova needing a label. Create it. */
3291 else if (GET_CODE (src) == UNSPEC
3292 && XINT (src, 1) == UNSPEC_MOVA
3293 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
3294 {
3295 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
3296 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
3297 newsrc = gen_rtx_UNSPEC (VOIDmode,
3298 gen_rtvec (1, newsrc),
3299 UNSPEC_MOVA);
3300 }
3301 else
3302 {
3303 lab = add_constant (src, mode, 0);
3304 newsrc = gen_rtx_MEM (mode,
3305 gen_rtx_LABEL_REF (VOIDmode, lab));
3306 }
3307 RTX_UNCHANGING_P (newsrc) = 1;
3308 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
3309 INSN_CODE (scan) = -1;
3310 }
3311 }
3312 dump_table (barrier);
3313 insn = barrier;
3314 }
3315 }
3316
3317 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3318 INSN_ADDRESSES_FREE ();
3319 split_branches (first);
3320
3321 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3322 also has an effect on the register that holds the addres of the sfunc.
3323 Insert an extra dummy insn in front of each sfunc that pretends to
3324 use this register. */
3325 if (flag_delayed_branch)
3326 {
3327 for (insn = first; insn; insn = NEXT_INSN (insn))
3328 {
3329 rtx reg = sfunc_uses_reg (insn);
3330
3331 if (! reg)
3332 continue;
3333 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3334 }
3335 }
3336 #if 0
3337 /* fpscr is not actually a user variable, but we pretend it is for the
3338 sake of the previous optimization passes, since we want it handled like
3339 one. However, we don't have any debugging information for it, so turn
3340 it into a non-user variable now. */
3341 if (TARGET_SH4)
3342 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3343 #endif
3344 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3345 }
3346
3347 int
3348 get_dest_uid (label, max_uid)
3349 rtx label;
3350 int max_uid;
3351 {
3352 rtx dest = next_real_insn (label);
3353 int dest_uid;
3354 if (! dest)
3355 /* This can happen for an undefined label. */
3356 return 0;
3357 dest_uid = INSN_UID (dest);
3358 /* If this is a newly created branch redirection blocking instruction,
3359 we cannot index the branch_uid or insn_addresses arrays with its
3360 uid. But then, we won't need to, because the actual destination is
3361 the following branch. */
3362 while (dest_uid >= max_uid)
3363 {
3364 dest = NEXT_INSN (dest);
3365 dest_uid = INSN_UID (dest);
3366 }
3367 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3368 return 0;
3369 return dest_uid;
3370 }
3371
3372 /* Split condbranches that are out of range. Also add clobbers for
3373 scratch registers that are needed in far jumps.
3374 We do this before delay slot scheduling, so that it can take our
3375 newly created instructions into account. It also allows us to
3376 find branches with common targets more easily. */
3377
3378 static void
3379 split_branches (first)
3380 rtx first;
3381 {
3382 rtx insn;
3383 struct far_branch **uid_branch, *far_branch_list = 0;
3384 int max_uid = get_max_uid ();
3385
3386 /* Find out which branches are out of range. */
3387 shorten_branches (first);
3388
3389 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3390 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
3391
3392 for (insn = first; insn; insn = NEXT_INSN (insn))
3393 if (! INSN_P (insn))
3394 continue;
3395 else if (INSN_DELETED_P (insn))
3396 {
3397 /* Shorten_branches would split this instruction again,
3398 so transform it into a note. */
3399 PUT_CODE (insn, NOTE);
3400 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3401 NOTE_SOURCE_FILE (insn) = 0;
3402 }
3403 else if (GET_CODE (insn) == JUMP_INSN
3404 /* Don't mess with ADDR_DIFF_VEC */
3405 && (GET_CODE (PATTERN (insn)) == SET
3406 || GET_CODE (PATTERN (insn)) == RETURN))
3407 {
3408 enum attr_type type = get_attr_type (insn);
3409 if (type == TYPE_CBRANCH)
3410 {
3411 rtx next, beyond;
3412
3413 if (get_attr_length (insn) > 4)
3414 {
3415 rtx src = SET_SRC (PATTERN (insn));
3416 rtx olabel = XEXP (XEXP (src, 1), 0);
3417 int addr = INSN_ADDRESSES (INSN_UID (insn));
3418 rtx label = 0;
3419 int dest_uid = get_dest_uid (olabel, max_uid);
3420 struct far_branch *bp = uid_branch[dest_uid];
3421
3422 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3423 the label if the LABEL_NUSES count drops to zero. There is
3424 always a jump_optimize pass that sets these values, but it
3425 proceeds to delete unreferenced code, and then if not
3426 optimizing, to un-delete the deleted instructions, thus
3427 leaving labels with too low uses counts. */
3428 if (! optimize)
3429 {
3430 JUMP_LABEL (insn) = olabel;
3431 LABEL_NUSES (olabel)++;
3432 }
3433 if (! bp)
3434 {
3435 bp = (struct far_branch *) alloca (sizeof *bp);
3436 uid_branch[dest_uid] = bp;
3437 bp->prev = far_branch_list;
3438 far_branch_list = bp;
3439 bp->far_label
3440 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3441 LABEL_NUSES (bp->far_label)++;
3442 }
3443 else
3444 {
3445 label = bp->near_label;
3446 if (! label && bp->address - addr >= CONDJUMP_MIN)
3447 {
3448 rtx block = bp->insert_place;
3449
3450 if (GET_CODE (PATTERN (block)) == RETURN)
3451 block = PREV_INSN (block);
3452 else
3453 block = gen_block_redirect (block,
3454 bp->address, 2);
3455 label = emit_label_after (gen_label_rtx (),
3456 PREV_INSN (block));
3457 bp->near_label = label;
3458 }
3459 else if (label && ! NEXT_INSN (label))
3460 {
3461 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3462 bp->insert_place = insn;
3463 else
3464 gen_far_branch (bp);
3465 }
3466 }
3467 if (! label
3468 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
3469 {
3470 bp->near_label = label = gen_label_rtx ();
3471 bp->insert_place = insn;
3472 bp->address = addr;
3473 }
3474 if (! redirect_jump (insn, label, 1))
3475 abort ();
3476 }
3477 else
3478 {
3479 /* get_attr_length (insn) == 2 */
3480 /* Check if we have a pattern where reorg wants to redirect
3481 the branch to a label from an unconditional branch that
3482 is too far away. */
3483 /* We can't use JUMP_LABEL here because it might be undefined
3484 when not optimizing. */
3485 /* A syntax error might cause beyond to be NULL_RTX. */
3486 beyond
3487 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3488 0));
3489
3490 if (beyond
3491 && (GET_CODE (beyond) == JUMP_INSN
3492 || ((beyond = next_active_insn (beyond))
3493 && GET_CODE (beyond) == JUMP_INSN))
3494 && GET_CODE (PATTERN (beyond)) == SET
3495 && recog_memoized (beyond) == CODE_FOR_jump
3496 && ((INSN_ADDRESSES
3497 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
3498 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3499 > 252 + 258 + 2))
3500 gen_block_redirect (beyond,
3501 INSN_ADDRESSES (INSN_UID (beyond)), 1);
3502 }
3503
3504 next = next_active_insn (insn);
3505
3506 if ((GET_CODE (next) == JUMP_INSN
3507 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3508 && GET_CODE (PATTERN (next)) == SET
3509 && recog_memoized (next) == CODE_FOR_jump
3510 && ((INSN_ADDRESSES
3511 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
3512 - INSN_ADDRESSES (INSN_UID (insn)) + 252U)
3513 > 252 + 258 + 2))
3514 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
3515 }
3516 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3517 {
3518 int addr = INSN_ADDRESSES (INSN_UID (insn));
3519 rtx far_label = 0;
3520 int dest_uid = 0;
3521 struct far_branch *bp;
3522
3523 if (type == TYPE_JUMP)
3524 {
3525 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3526 dest_uid = get_dest_uid (far_label, max_uid);
3527 if (! dest_uid)
3528 {
3529 /* Parse errors can lead to labels outside
3530 the insn stream. */
3531 if (! NEXT_INSN (far_label))
3532 continue;
3533
3534 if (! optimize)
3535 {
3536 JUMP_LABEL (insn) = far_label;
3537 LABEL_NUSES (far_label)++;
3538 }
3539 redirect_jump (insn, NULL_RTX, 1);
3540 far_label = 0;
3541 }
3542 }
3543 bp = uid_branch[dest_uid];
3544 if (! bp)
3545 {
3546 bp = (struct far_branch *) alloca (sizeof *bp);
3547 uid_branch[dest_uid] = bp;
3548 bp->prev = far_branch_list;
3549 far_branch_list = bp;
3550 bp->near_label = 0;
3551 bp->far_label = far_label;
3552 if (far_label)
3553 LABEL_NUSES (far_label)++;
3554 }
3555 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3556 if (addr - bp->address <= CONDJUMP_MAX)
3557 emit_label_after (bp->near_label, PREV_INSN (insn));
3558 else
3559 {
3560 gen_far_branch (bp);
3561 bp->near_label = 0;
3562 }
3563 else
3564 bp->near_label = 0;
3565 bp->address = addr;
3566 bp->insert_place = insn;
3567 if (! far_label)
3568 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3569 else
3570 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3571 }
3572 }
3573 /* Generate all pending far branches,
3574 and free our references to the far labels. */
3575 while (far_branch_list)
3576 {
3577 if (far_branch_list->near_label
3578 && ! NEXT_INSN (far_branch_list->near_label))
3579 gen_far_branch (far_branch_list);
3580 if (optimize
3581 && far_branch_list->far_label
3582 && ! --LABEL_NUSES (far_branch_list->far_label))
3583 delete_insn (far_branch_list->far_label);
3584 far_branch_list = far_branch_list->prev;
3585 }
3586
3587 /* Instruction length information is no longer valid due to the new
3588 instructions that have been generated. */
3589 init_insn_lengths ();
3590 }
3591
3592 /* Dump out instruction addresses, which is useful for debugging the
3593 constant pool table stuff.
3594
3595 If relaxing, output the label and pseudo-ops used to link together
3596 calls and the instruction which set the registers. */
3597
3598 /* ??? This is unnecessary, and probably should be deleted. This makes
3599 the insn_addresses declaration above unnecessary. */
3600
3601 /* ??? The addresses printed by this routine for insns are nonsense for
3602 insns which are inside of a sequence where none of the inner insns have
3603 variable length. This is because the second pass of shorten_branches
3604 does not bother to update them. */
3605
3606 void
3607 final_prescan_insn (insn, opvec, noperands)
3608 rtx insn;
3609 rtx *opvec ATTRIBUTE_UNUSED;
3610 int noperands ATTRIBUTE_UNUSED;
3611 {
3612 if (TARGET_DUMPISIZE)
3613 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
3614
3615 if (TARGET_RELAX)
3616 {
3617 rtx note;
3618
3619 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3620 if (note)
3621 {
3622 rtx pattern;
3623
3624 pattern = PATTERN (insn);
3625 if (GET_CODE (pattern) == PARALLEL)
3626 pattern = XVECEXP (pattern, 0, 0);
3627 if (GET_CODE (pattern) == CALL
3628 || (GET_CODE (pattern) == SET
3629 && (GET_CODE (SET_SRC (pattern)) == CALL
3630 || get_attr_type (insn) == TYPE_SFUNC)))
3631 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3632 CODE_LABEL_NUMBER (XEXP (note, 0)));
3633 else if (GET_CODE (pattern) == SET)
3634 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3635 CODE_LABEL_NUMBER (XEXP (note, 0)));
3636 else
3637 abort ();
3638 }
3639 }
3640 }
3641
3642 /* Dump out any constants accumulated in the final pass. These will
3643 only be labels. */
3644
3645 const char *
3646 output_jump_label_table ()
3647 {
3648 int i;
3649
3650 if (pool_size)
3651 {
3652 fprintf (asm_out_file, "\t.align 2\n");
3653 for (i = 0; i < pool_size; i++)
3654 {
3655 pool_node *p = &pool_vector[i];
3656
3657 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3658 CODE_LABEL_NUMBER (p->label));
3659 output_asm_insn (".long %O0", &p->value);
3660 }
3661 pool_size = 0;
3662 }
3663
3664 return "";
3665 }
3666 \f
3667 /* A full frame looks like:
3668
3669 arg-5
3670 arg-4
3671 [ if current_function_anonymous_args
3672 arg-3
3673 arg-2
3674 arg-1
3675 arg-0 ]
3676 saved-fp
3677 saved-r10
3678 saved-r11
3679 saved-r12
3680 saved-pr
3681 local-n
3682 ..
3683 local-1
3684 local-0 <- fp points here. */
3685
3686 /* Number of bytes pushed for anonymous args, used to pass information
3687 between expand_prologue and expand_epilogue. */
3688
3689 static int extra_push;
3690
3691 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3692 to be adjusted, and TEMP, if nonnegative, holds the register number
3693 of a general register that we may clobber. */
3694
3695 static void
3696 output_stack_adjust (size, reg, temp)
3697 int size;
3698 rtx reg;
3699 int temp;
3700 {
3701 if (size)
3702 {
3703 if (CONST_OK_FOR_I (size))
3704 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3705 /* Try to do it with two partial adjustments; however, we must make
3706 sure that the stack is properly aligned at all times, in case
3707 an interrupt occurs between the two partial adjustments. */
3708 else if (CONST_OK_FOR_I (size / 2 & -4)
3709 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3710 {
3711 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3712 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3713 }
3714 else
3715 {
3716 rtx const_reg;
3717
3718 /* If TEMP is invalid, we could temporarily save a general
3719 register to MACL. However, there is currently no need
3720 to handle this case, so just abort when we see it. */
3721 if (temp < 0)
3722 abort ();
3723 const_reg = gen_rtx_REG (SImode, temp);
3724
3725 /* If SIZE is negative, subtract the positive value.
3726 This sometimes allows a constant pool entry to be shared
3727 between prologue and epilogue code. */
3728 if (size < 0)
3729 {
3730 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3731 emit_insn (gen_subsi3 (reg, reg, const_reg));
3732 }
3733 else
3734 {
3735 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3736 emit_insn (gen_addsi3 (reg, reg, const_reg));
3737 }
3738 }
3739 }
3740 }
3741
3742 /* Output RTL to push register RN onto the stack. */
3743
3744 static void
3745 push (rn)
3746 int rn;
3747 {
3748 rtx x;
3749 if (rn == FPUL_REG)
3750 x = gen_push_fpul ();
3751 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3752 && FP_OR_XD_REGISTER_P (rn))
3753 {
3754 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3755 return;
3756 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
3757 }
3758 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3759 x = gen_push_e (gen_rtx_REG (SFmode, rn));
3760 else
3761 x = gen_push (gen_rtx_REG (SImode, rn));
3762
3763 x = emit_insn (x);
3764 REG_NOTES (x)
3765 = gen_rtx_EXPR_LIST (REG_INC,
3766 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3767 }
3768
3769 /* Output RTL to pop register RN from the stack. */
3770
3771 static void
3772 pop (rn)
3773 int rn;
3774 {
3775 rtx x;
3776 if (rn == FPUL_REG)
3777 x = gen_pop_fpul ();
3778 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3779 && FP_OR_XD_REGISTER_P (rn))
3780 {
3781 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
3782 return;
3783 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
3784 }
3785 else if (TARGET_SH3E && FP_REGISTER_P (rn))
3786 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
3787 else
3788 x = gen_pop (gen_rtx_REG (SImode, rn));
3789
3790 x = emit_insn (x);
3791 REG_NOTES (x)
3792 = gen_rtx_EXPR_LIST (REG_INC,
3793 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
3794 }
3795
3796 /* Generate code to push the regs specified in the mask. */
3797
3798 static void
3799 push_regs (mask, mask2)
3800 int mask, mask2;
3801 {
3802 int i;
3803
3804 /* Push PR last; this gives better latencies after the prologue, and
3805 candidates for the return delay slot when there are no general
3806 registers pushed. */
3807 for (i = 0; i < 32; i++)
3808 if (mask & (1 << i) && i != PR_REG)
3809 push (i);
3810 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3811 if (mask2 & (1 << (i - 32)))
3812 push (i);
3813 if (mask & (1 << PR_REG))
3814 push (PR_REG);
3815 }
3816
3817 /* Work out the registers which need to be saved, both as a mask and a
3818 count of saved words.
3819
3820 If doing a pragma interrupt function, then push all regs used by the
3821 function, and if we call another function (we can tell by looking at PR),
3822 make sure that all the regs it clobbers are safe too. */
3823
3824 static int
3825 calc_live_regs (count_ptr, live_regs_mask2)
3826 int *count_ptr;
3827 int *live_regs_mask2;
3828 {
3829 int reg;
3830 int live_regs_mask = 0;
3831 int count;
3832 int interrupt_handler;
3833
3834 if ((lookup_attribute
3835 ("interrupt_handler",
3836 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
3837 != NULL_TREE)
3838 interrupt_handler = 1;
3839 else
3840 interrupt_handler = 0;
3841
3842 *live_regs_mask2 = 0;
3843 /* If we can save a lot of saves by switching to double mode, do that. */
3844 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3845 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3846 if (regs_ever_live[reg] && regs_ever_live[reg+1]
3847 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3848 && ++count > 2)
3849 {
3850 target_flags &= ~FPU_SINGLE_BIT;
3851 break;
3852 }
3853 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3854 {
3855 if ((interrupt_handler && ! pragma_trapa)
3856 ? (/* Need to save all the regs ever live. */
3857 (regs_ever_live[reg]
3858 || (call_used_regs[reg]
3859 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3860 && regs_ever_live[PR_REG]))
3861 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3862 && reg != RETURN_ADDRESS_POINTER_REGNUM
3863 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
3864 : (/* Only push those regs which are used and need to be saved. */
3865 regs_ever_live[reg] && ! call_used_regs[reg]))
3866 {
3867 if (reg >= 32)
3868 *live_regs_mask2 |= 1 << (reg - 32);
3869 else
3870 live_regs_mask |= 1 << reg;
3871 count++;
3872 if (TARGET_SH4 && TARGET_FMOVD && FP_OR_XD_REGISTER_P (reg))
3873 {
3874 if (FP_REGISTER_P (reg))
3875 {
3876 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
3877 {
3878 if (reg >= 32)
3879 *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
3880 else
3881 live_regs_mask |= 1 << (reg ^ 1);
3882 count++;
3883 }
3884 }
3885 else /* if (XD_REGISTER_P (reg)) */
3886 {
3887 /* Must switch to double mode to access these registers. */
3888 target_flags &= ~FPU_SINGLE_BIT;
3889 count++;
3890 }
3891 }
3892 }
3893 }
3894
3895 *count_ptr = count * UNITS_PER_WORD;
3896 return live_regs_mask;
3897 }
3898
3899 /* Code to generate prologue and epilogue sequences */
3900
3901 /* PUSHED is the number of bytes that are bing pushed on the
3902 stack for register saves. Return the frame size, padded
3903 appropriately so that the stack stays properly aligned. */
3904 static HOST_WIDE_INT
3905 rounded_frame_size (pushed)
3906 int pushed;
3907 {
3908 HOST_WIDE_INT size = get_frame_size ();
3909 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
3910
3911 return (size + pushed + align - 1 & -align) - pushed;
3912 }
3913
3914 void
3915 sh_expand_prologue ()
3916 {
3917 int live_regs_mask;
3918 int d, i;
3919 int live_regs_mask2;
3920 int save_flags = target_flags;
3921
3922 /* We have pretend args if we had an object sent partially in registers
3923 and partially on the stack, e.g. a large structure. */
3924 output_stack_adjust (-current_function_pretend_args_size,
3925 stack_pointer_rtx, 3);
3926
3927 extra_push = 0;
3928
3929 /* This is set by SETUP_VARARGS to indicate that this is a varargs
3930 routine. Clear it here so that the next function isn't affected. */
3931 if (current_function_anonymous_args)
3932 {
3933 current_function_anonymous_args = 0;
3934
3935 /* This is not used by the SH3E calling convention */
3936 if (! TARGET_SH3E && ! TARGET_HITACHI)
3937 {
3938 /* Push arg regs as if they'd been provided by caller in stack. */
3939 for (i = 0; i < NPARM_REGS(SImode); i++)
3940 {
3941 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
3942 if (i >= (NPARM_REGS(SImode)
3943 - current_function_args_info.arg_count[(int) SH_ARG_INT]
3944 ))
3945 break;
3946 push (rn);
3947 extra_push += 4;
3948 }
3949 }
3950 }
3951
3952 /* If we're supposed to switch stacks at function entry, do so now. */
3953 if (sp_switch)
3954 emit_insn (gen_sp_switch_1 ());
3955
3956 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3957 /* ??? Maybe we could save some switching if we can move a mode switch
3958 that already happens to be at the function start into the prologue. */
3959 if (target_flags != save_flags)
3960 emit_insn (gen_toggle_sz ());
3961
3962 push_regs (live_regs_mask, live_regs_mask2);
3963
3964 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
3965 emit_insn (gen_GOTaddr2picreg ());
3966
3967 if (target_flags != save_flags)
3968 emit_insn (gen_toggle_sz ());
3969
3970 target_flags = save_flags;
3971
3972 output_stack_adjust (-rounded_frame_size (d),
3973 stack_pointer_rtx, 3);
3974
3975 if (frame_pointer_needed)
3976 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
3977 }
3978
3979 void
3980 sh_expand_epilogue ()
3981 {
3982 int live_regs_mask;
3983 int d, i;
3984
3985 int live_regs_mask2;
3986 int save_flags = target_flags;
3987 int frame_size;
3988
3989 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3990
3991 frame_size = rounded_frame_size (d);
3992
3993 if (frame_pointer_needed)
3994 {
3995 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
3996
3997 /* We must avoid moving the stack pointer adjustment past code
3998 which reads from the local frame, else an interrupt could
3999 occur after the SP adjustment and clobber data in the local
4000 frame. */
4001 emit_insn (gen_blockage ());
4002 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
4003 }
4004 else if (frame_size)
4005 {
4006 /* We must avoid moving the stack pointer adjustment past code
4007 which reads from the local frame, else an interrupt could
4008 occur after the SP adjustment and clobber data in the local
4009 frame. */
4010 emit_insn (gen_blockage ());
4011 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
4012 }
4013
4014 /* Pop all the registers. */
4015
4016 if (target_flags != save_flags)
4017 emit_insn (gen_toggle_sz ());
4018 if (live_regs_mask & (1 << PR_REG))
4019 pop (PR_REG);
4020 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4021 {
4022 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
4023 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
4024 pop (j);
4025 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
4026 pop (j);
4027 }
4028 if (target_flags != save_flags)
4029 emit_insn (gen_toggle_sz ());
4030 target_flags = save_flags;
4031
4032 output_stack_adjust (extra_push + current_function_pretend_args_size,
4033 stack_pointer_rtx, 7);
4034
4035 /* Switch back to the normal stack if necessary. */
4036 if (sp_switch)
4037 emit_insn (gen_sp_switch_2 ());
4038 }
4039
4040 static int sh_need_epilogue_known = 0;
4041
4042 int
4043 sh_need_epilogue ()
4044 {
4045 if (! sh_need_epilogue_known)
4046 {
4047 rtx epilogue;
4048
4049 start_sequence ();
4050 sh_expand_epilogue ();
4051 epilogue = gen_sequence ();
4052 end_sequence ();
4053 sh_need_epilogue_known
4054 = (GET_CODE (epilogue) == SEQUENCE && XVECLEN (epilogue, 0) == 0
4055 ? -1 : 1);
4056 }
4057 return sh_need_epilogue_known > 0;
4058 }
4059
4060 /* Clear variables at function end. */
4061
4062 void
4063 function_epilogue (stream, size)
4064 FILE *stream ATTRIBUTE_UNUSED;
4065 int size ATTRIBUTE_UNUSED;
4066 {
4067 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
4068 sh_need_epilogue_known = 0;
4069 sp_switch = NULL_RTX;
4070 }
4071
4072 rtx
4073 sh_builtin_saveregs ()
4074 {
4075 /* First unnamed integer register. */
4076 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
4077 /* Number of integer registers we need to save. */
4078 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
4079 /* First unnamed SFmode float reg */
4080 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
4081 /* Number of SFmode float regs to save. */
4082 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
4083 rtx regbuf, fpregs;
4084 int bufsize, regno, alias_set;
4085
4086 /* Allocate block of memory for the regs. */
4087 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
4088 Or can assign_stack_local accept a 0 SIZE argument? */
4089 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
4090
4091 regbuf = assign_stack_local (BLKmode, bufsize, 0);
4092 alias_set = get_varargs_alias_set ();
4093 MEM_ALIAS_SET (regbuf) = alias_set;
4094
4095 /* Save int args.
4096 This is optimized to only save the regs that are necessary. Explicitly
4097 named args need not be saved. */
4098 if (n_intregs > 0)
4099 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
4100 change_address (regbuf, BLKmode,
4101 plus_constant (XEXP (regbuf, 0),
4102 (n_floatregs
4103 * UNITS_PER_WORD))),
4104 n_intregs, n_intregs * UNITS_PER_WORD);
4105
4106 /* Save float args.
4107 This is optimized to only save the regs that are necessary. Explicitly
4108 named args need not be saved.
4109 We explicitly build a pointer to the buffer because it halves the insn
4110 count when not optimizing (otherwise the pointer is built for each reg
4111 saved).
4112 We emit the moves in reverse order so that we can use predecrement. */
4113
4114 fpregs = gen_reg_rtx (Pmode);
4115 emit_move_insn (fpregs, XEXP (regbuf, 0));
4116 emit_insn (gen_addsi3 (fpregs, fpregs,
4117 GEN_INT (n_floatregs * UNITS_PER_WORD)));
4118 if (TARGET_SH4)
4119 {
4120 rtx mem;
4121 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
4122 {
4123 emit_insn (gen_addsi3 (fpregs, fpregs,
4124 GEN_INT (-2 * UNITS_PER_WORD)));
4125 mem = gen_rtx_MEM (DFmode, fpregs);
4126 MEM_ALIAS_SET (mem) = alias_set;
4127 emit_move_insn (mem,
4128 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
4129 }
4130 regno = first_floatreg;
4131 if (regno & 1)
4132 {
4133 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4134 mem = gen_rtx_MEM (SFmode, fpregs);
4135 MEM_ALIAS_SET (mem) = alias_set;
4136 emit_move_insn (mem,
4137 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
4138 - (TARGET_LITTLE_ENDIAN != 0)));
4139 }
4140 }
4141 else
4142 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
4143 {
4144 rtx mem;
4145 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
4146 mem = gen_rtx_MEM (SFmode, fpregs);
4147 MEM_ALIAS_SET (mem) = alias_set;
4148 emit_move_insn (mem,
4149 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
4150 }
4151
4152 /* Return the address of the regbuf. */
4153 return XEXP (regbuf, 0);
4154 }
4155
4156 /* Define the `__builtin_va_list' type for the ABI. */
4157
4158 tree
4159 sh_build_va_list ()
4160 {
4161 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4162 tree record;
4163
4164 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4165 return ptr_type_node;
4166
4167 record = make_node (RECORD_TYPE);
4168
4169 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
4170 ptr_type_node);
4171 f_next_o_limit = build_decl (FIELD_DECL,
4172 get_identifier ("__va_next_o_limit"),
4173 ptr_type_node);
4174 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
4175 ptr_type_node);
4176 f_next_fp_limit = build_decl (FIELD_DECL,
4177 get_identifier ("__va_next_fp_limit"),
4178 ptr_type_node);
4179 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
4180 ptr_type_node);
4181
4182 DECL_FIELD_CONTEXT (f_next_o) = record;
4183 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
4184 DECL_FIELD_CONTEXT (f_next_fp) = record;
4185 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
4186 DECL_FIELD_CONTEXT (f_next_stack) = record;
4187
4188 TYPE_FIELDS (record) = f_next_o;
4189 TREE_CHAIN (f_next_o) = f_next_o_limit;
4190 TREE_CHAIN (f_next_o_limit) = f_next_fp;
4191 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
4192 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
4193
4194 layout_type (record);
4195
4196 return record;
4197 }
4198
4199 /* Implement `va_start' for varargs and stdarg. */
4200
4201 void
4202 sh_va_start (stdarg_p, valist, nextarg)
4203 int stdarg_p;
4204 tree valist;
4205 rtx nextarg;
4206 {
4207 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4208 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4209 tree t, u;
4210 int nfp, nint;
4211
4212 if ((! TARGET_SH3E && ! TARGET_SH4) || TARGET_HITACHI)
4213 {
4214 std_expand_builtin_va_start (stdarg_p, valist, nextarg);
4215 return;
4216 }
4217
4218 f_next_o = TYPE_FIELDS (va_list_type_node);
4219 f_next_o_limit = TREE_CHAIN (f_next_o);
4220 f_next_fp = TREE_CHAIN (f_next_o_limit);
4221 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4222 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4223
4224 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4225 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4226 valist, f_next_o_limit);
4227 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
4228 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4229 valist, f_next_fp_limit);
4230 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4231 valist, f_next_stack);
4232
4233 /* Call __builtin_saveregs. */
4234 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
4235 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
4236 TREE_SIDE_EFFECTS (t) = 1;
4237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4238
4239 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
4240 if (nfp < 8)
4241 nfp = 8 - nfp;
4242 else
4243 nfp = 0;
4244 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4245 build_int_2 (UNITS_PER_WORD * nfp, 0)));
4246 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
4247 TREE_SIDE_EFFECTS (t) = 1;
4248 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4249
4250 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
4251 TREE_SIDE_EFFECTS (t) = 1;
4252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4253
4254 nint = current_function_args_info.arg_count[SH_ARG_INT];
4255 if (nint < 4)
4256 nint = 4 - nint;
4257 else
4258 nint = 0;
4259 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4260 build_int_2 (UNITS_PER_WORD * nint, 0)));
4261 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
4262 TREE_SIDE_EFFECTS (t) = 1;
4263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4264
4265 u = make_tree (ptr_type_node, nextarg);
4266 if (! stdarg_p && (nint == 0 || nfp == 0))
4267 {
4268 u = fold (build (PLUS_EXPR, ptr_type_node, u,
4269 build_int_2 (-UNITS_PER_WORD, -1)));
4270 }
4271 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
4272 TREE_SIDE_EFFECTS (t) = 1;
4273 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4274 }
4275
4276 /* Implement `va_arg'. */
4277
4278 rtx
4279 sh_va_arg (valist, type)
4280 tree valist, type;
4281 {
4282 HOST_WIDE_INT size, rsize;
4283 tree tmp, pptr_type_node;
4284 rtx addr_rtx, r;
4285
4286 size = int_size_in_bytes (type);
4287 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
4288 pptr_type_node = build_pointer_type (ptr_type_node);
4289
4290 if ((TARGET_SH3E || TARGET_SH4) && ! TARGET_HITACHI)
4291 {
4292 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
4293 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
4294 int pass_as_float;
4295 rtx lab_false, lab_over;
4296
4297 f_next_o = TYPE_FIELDS (va_list_type_node);
4298 f_next_o_limit = TREE_CHAIN (f_next_o);
4299 f_next_fp = TREE_CHAIN (f_next_o_limit);
4300 f_next_fp_limit = TREE_CHAIN (f_next_fp);
4301 f_next_stack = TREE_CHAIN (f_next_fp_limit);
4302
4303 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
4304 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
4305 valist, f_next_o_limit);
4306 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
4307 valist, f_next_fp);
4308 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
4309 valist, f_next_fp_limit);
4310 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
4311 valist, f_next_stack);
4312
4313 if (TARGET_SH4)
4314 {
4315 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
4316 || (TREE_CODE (type) == COMPLEX_TYPE
4317 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
4318 && size <= 16));
4319 }
4320 else
4321 {
4322 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
4323 }
4324
4325 addr_rtx = gen_reg_rtx (Pmode);
4326 lab_false = gen_label_rtx ();
4327 lab_over = gen_label_rtx ();
4328
4329 if (pass_as_float)
4330 {
4331 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
4332 EXPAND_NORMAL),
4333 expand_expr (next_fp_limit, NULL_RTX,
4334 Pmode, EXPAND_NORMAL),
4335 GE, const1_rtx, Pmode, 1, 1, lab_false);
4336
4337 if (TYPE_ALIGN (type) > BITS_PER_WORD)
4338 {
4339 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
4340 build_int_2 (UNITS_PER_WORD, 0));
4341 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
4342 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
4343 TREE_SIDE_EFFECTS (tmp) = 1;
4344 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4345 }
4346
4347 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
4348 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4349 if (r != addr_rtx)
4350 emit_move_insn (addr_rtx, r);
4351
4352 emit_jump_insn (gen_jump (lab_over));
4353 emit_barrier ();
4354 emit_label (lab_false);
4355
4356 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4357 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4358 if (r != addr_rtx)
4359 emit_move_insn (addr_rtx, r);
4360 }
4361 else
4362 {
4363 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
4364 build_int_2 (rsize, 0));
4365
4366 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
4367 EXPAND_NORMAL),
4368 expand_expr (next_o_limit, NULL_RTX,
4369 Pmode, EXPAND_NORMAL),
4370 GT, const1_rtx, Pmode, 1, 1, lab_false);
4371
4372 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
4373 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4374 if (r != addr_rtx)
4375 emit_move_insn (addr_rtx, r);
4376
4377 emit_jump_insn (gen_jump (lab_over));
4378 emit_barrier ();
4379 emit_label (lab_false);
4380
4381 if (size > 4 && ! TARGET_SH4)
4382 {
4383 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
4384 TREE_SIDE_EFFECTS (tmp) = 1;
4385 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
4386 }
4387
4388 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
4389 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
4390 if (r != addr_rtx)
4391 emit_move_insn (addr_rtx, r);
4392 }
4393
4394 emit_label (lab_over);
4395
4396 tmp = make_tree (pptr_type_node, addr_rtx);
4397 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
4398 }
4399
4400 /* ??? In va-sh.h, there had been code to make values larger than
4401 size 8 indirect. This does not match the FUNCTION_ARG macros. */
4402
4403 return std_expand_builtin_va_arg (valist, type);
4404 }
4405
4406 /* Define the offset between two registers, one to be eliminated, and
4407 the other its replacement, at the start of a routine. */
4408
4409 int
4410 initial_elimination_offset (from, to)
4411 int from;
4412 int to;
4413 {
4414 int regs_saved;
4415 int total_saved_regs_space;
4416 int total_auto_space;
4417 int save_flags = target_flags;
4418
4419 int live_regs_mask, live_regs_mask2;
4420 live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
4421 total_auto_space = rounded_frame_size (regs_saved);
4422 target_flags = save_flags;
4423
4424 total_saved_regs_space = regs_saved;
4425
4426 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
4427 return total_saved_regs_space + total_auto_space;
4428
4429 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4430 return total_saved_regs_space + total_auto_space;
4431
4432 /* Initial gap between fp and sp is 0. */
4433 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4434 return 0;
4435
4436 if (from == RETURN_ADDRESS_POINTER_REGNUM
4437 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
4438 {
4439 int i, n = total_saved_regs_space;
4440 for (i = PR_REG-1; i >= 0; i--)
4441 if (live_regs_mask & (1 << i))
4442 n -= 4;
4443 return n + total_auto_space;
4444 }
4445
4446 abort ();
4447 }
4448 \f
4449 /* Handle machine specific pragmas to be semi-compatible with Hitachi
4450 compiler. */
4451
4452 void
4453 sh_pr_interrupt (pfile)
4454 cpp_reader *pfile ATTRIBUTE_UNUSED;
4455 {
4456 pragma_interrupt = 1;
4457 }
4458
4459 void
4460 sh_pr_trapa (pfile)
4461 cpp_reader *pfile ATTRIBUTE_UNUSED;
4462 {
4463 pragma_interrupt = pragma_trapa = 1;
4464 }
4465
4466 void
4467 sh_pr_nosave_low_regs (pfile)
4468 cpp_reader *pfile ATTRIBUTE_UNUSED;
4469 {
4470 pragma_nosave_low_regs = 1;
4471 }
4472
4473 /* Generate 'handle_interrupt' attribute for decls */
4474
4475 void
4476 sh_pragma_insert_attributes (node, attributes, prefix)
4477 tree node;
4478 tree * attributes;
4479 tree * prefix ATTRIBUTE_UNUSED;
4480 {
4481 if (! pragma_interrupt
4482 || TREE_CODE (node) != FUNCTION_DECL)
4483 return;
4484
4485 /* We are only interested in fields. */
4486 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4487 return;
4488
4489 /* Add a 'handle_interrupt' attribute. */
4490 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4491
4492 return;
4493 }
4494
4495 /* Return nonzero if ATTR is a valid attribute for DECL.
4496 ATTRIBUTES are any existing attributes and ARGS are the arguments
4497 supplied with ATTR.
4498
4499 Supported attributes:
4500
4501 interrupt_handler -- specifies this function is an interrupt handler.
4502
4503 sp_switch -- specifies an alternate stack for an interrupt handler
4504 to run on.
4505
4506 trap_exit -- use a trapa to exit an interrupt function instead of
4507 an rte instruction. */
4508
4509 int
4510 sh_valid_machine_decl_attribute (decl, attributes, attr, args)
4511 tree decl;
4512 tree attributes ATTRIBUTE_UNUSED;
4513 tree attr;
4514 tree args;
4515 {
4516 if (TREE_CODE (decl) != FUNCTION_DECL)
4517 return 0;
4518
4519 if (is_attribute_p ("interrupt_handler", attr))
4520 {
4521 return 1;
4522 }
4523
4524 if (is_attribute_p ("sp_switch", attr))
4525 {
4526 /* The sp_switch attribute only has meaning for interrupt functions. */
4527 if (!pragma_interrupt)
4528 return 0;
4529
4530 /* sp_switch must have an argument. */
4531 if (!args || TREE_CODE (args) != TREE_LIST)
4532 return 0;
4533
4534 /* The argument must be a constant string. */
4535 if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4536 return 0;
4537
4538 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
4539 TREE_STRING_POINTER (TREE_VALUE (args)));
4540 return 1;
4541 }
4542
4543 if (is_attribute_p ("trap_exit", attr))
4544 {
4545 /* The trap_exit attribute only has meaning for interrupt functions. */
4546 if (!pragma_interrupt)
4547 return 0;
4548
4549 /* trap_exit must have an argument. */
4550 if (!args || TREE_CODE (args) != TREE_LIST)
4551 return 0;
4552
4553 /* The argument must be a constant integer. */
4554 if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4555 return 0;
4556
4557 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4558 return 1;
4559 }
4560
4561 return 0;
4562 }
4563
4564 \f
4565 /* Predicates used by the templates. */
4566
4567 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
4568 Used only in general_movsrc_operand. */
4569
4570 int
4571 system_reg_operand (op, mode)
4572 rtx op;
4573 enum machine_mode mode ATTRIBUTE_UNUSED;
4574 {
4575 switch (REGNO (op))
4576 {
4577 case PR_REG:
4578 case MACL_REG:
4579 case MACH_REG:
4580 return 1;
4581 }
4582 return 0;
4583 }
4584
4585 /* Returns 1 if OP can be source of a simple move operation.
4586 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4587 invalid as are subregs of system registers. */
4588
4589 int
4590 general_movsrc_operand (op, mode)
4591 rtx op;
4592 enum machine_mode mode;
4593 {
4594 if (GET_CODE (op) == MEM)
4595 {
4596 rtx inside = XEXP (op, 0);
4597 if (GET_CODE (inside) == CONST)
4598 inside = XEXP (inside, 0);
4599
4600 if (GET_CODE (inside) == LABEL_REF)
4601 return 1;
4602
4603 if (GET_CODE (inside) == PLUS
4604 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4605 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4606 return 1;
4607
4608 /* Only post inc allowed. */
4609 if (GET_CODE (inside) == PRE_DEC)
4610 return 0;
4611 }
4612
4613 if ((mode == QImode || mode == HImode)
4614 && (GET_CODE (op) == SUBREG
4615 && GET_CODE (XEXP (op, 0)) == REG
4616 && system_reg_operand (XEXP (op, 0), mode)))
4617 return 0;
4618
4619 return general_operand (op, mode);
4620 }
4621
4622 /* Returns 1 if OP can be a destination of a move.
4623 Same as general_operand, but no preinc allowed. */
4624
4625 int
4626 general_movdst_operand (op, mode)
4627 rtx op;
4628 enum machine_mode mode;
4629 {
4630 /* Only pre dec allowed. */
4631 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4632 return 0;
4633
4634 return general_operand (op, mode);
4635 }
4636
4637 /* Returns 1 if OP is a normal arithmetic register. */
4638
4639 int
4640 arith_reg_operand (op, mode)
4641 rtx op;
4642 enum machine_mode mode;
4643 {
4644 if (register_operand (op, mode))
4645 {
4646 int regno;
4647
4648 if (GET_CODE (op) == REG)
4649 regno = REGNO (op);
4650 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4651 regno = REGNO (SUBREG_REG (op));
4652 else
4653 return 1;
4654
4655 return (regno != T_REG && regno != PR_REG
4656 && (regno != FPUL_REG || TARGET_SH4)
4657 && regno != MACH_REG && regno != MACL_REG);
4658 }
4659 return 0;
4660 }
4661
4662 int
4663 fp_arith_reg_operand (op, mode)
4664 rtx op;
4665 enum machine_mode mode;
4666 {
4667 if (register_operand (op, mode))
4668 {
4669 int regno;
4670
4671 if (GET_CODE (op) == REG)
4672 regno = REGNO (op);
4673 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4674 regno = REGNO (SUBREG_REG (op));
4675 else
4676 return 1;
4677
4678 return (regno >= FIRST_PSEUDO_REGISTER
4679 || FP_REGISTER_P (regno));
4680 }
4681 return 0;
4682 }
4683
4684 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
4685
4686 int
4687 arith_operand (op, mode)
4688 rtx op;
4689 enum machine_mode mode;
4690 {
4691 if (arith_reg_operand (op, mode))
4692 return 1;
4693
4694 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4695 return 1;
4696
4697 return 0;
4698 }
4699
4700 /* Returns 1 if OP is a valid source operand for a compare insn. */
4701
4702 int
4703 arith_reg_or_0_operand (op, mode)
4704 rtx op;
4705 enum machine_mode mode;
4706 {
4707 if (arith_reg_operand (op, mode))
4708 return 1;
4709
4710 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4711 return 1;
4712
4713 return 0;
4714 }
4715
4716 /* Returns 1 if OP is a valid source operand for a logical operation. */
4717
4718 int
4719 logical_operand (op, mode)
4720 rtx op;
4721 enum machine_mode mode;
4722 {
4723 if (arith_reg_operand (op, mode))
4724 return 1;
4725
4726 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4727 return 1;
4728
4729 return 0;
4730 }
4731
4732 /* Nonzero if OP is a floating point value with value 0.0. */
4733
4734 int
4735 fp_zero_operand (op)
4736 rtx op;
4737 {
4738 REAL_VALUE_TYPE r;
4739
4740 if (GET_MODE (op) != SFmode)
4741 return 0;
4742
4743 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4744 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4745 }
4746
4747 /* Nonzero if OP is a floating point value with value 1.0. */
4748
4749 int
4750 fp_one_operand (op)
4751 rtx op;
4752 {
4753 REAL_VALUE_TYPE r;
4754
4755 if (GET_MODE (op) != SFmode)
4756 return 0;
4757
4758 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4759 return REAL_VALUES_EQUAL (r, dconst1);
4760 }
4761
4762 /* For -m4 and -m4-single-only, mode switching is used. If we are
4763 compiling without -mfmovd, movsf_ie isn't taken into account for
4764 mode switching. We could check in machine_dependent_reorg for
4765 cases where we know we are in single precision mode, but there is
4766 interface to find that out during reload, so we must avoid
4767 choosing an fldi alternative during reload and thus failing to
4768 allocate a scratch register for the constant loading. */
4769 int
4770 fldi_ok ()
4771 {
4772 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
4773 }
4774
4775 int
4776 tertiary_reload_operand (op, mode)
4777 rtx op;
4778 enum machine_mode mode ATTRIBUTE_UNUSED;
4779 {
4780 enum rtx_code code = GET_CODE (op);
4781 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
4782 }
4783
4784 int
4785 fpscr_operand (op, mode)
4786 rtx op;
4787 enum machine_mode mode ATTRIBUTE_UNUSED;
4788 {
4789 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
4790 && GET_MODE (op) == PSImode);
4791 }
4792
4793 int
4794 fpul_operand (op, mode)
4795 rtx op;
4796 enum machine_mode mode;
4797 {
4798 return (GET_CODE (op) == REG
4799 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
4800 && GET_MODE (op) == mode);
4801 }
4802
4803 int
4804 symbol_ref_operand (op, mode)
4805 rtx op;
4806 enum machine_mode mode ATTRIBUTE_UNUSED;
4807 {
4808 return (GET_CODE (op) == SYMBOL_REF);
4809 }
4810
4811 int
4812 commutative_float_operator (op, mode)
4813 rtx op;
4814 enum machine_mode mode;
4815 {
4816 if (GET_MODE (op) != mode)
4817 return 0;
4818 switch (GET_CODE (op))
4819 {
4820 case PLUS:
4821 case MULT:
4822 return 1;
4823 default:
4824 break;
4825 }
4826 return 0;
4827 }
4828
4829 int
4830 noncommutative_float_operator (op, mode)
4831 rtx op;
4832 enum machine_mode mode;
4833 {
4834 if (GET_MODE (op) != mode)
4835 return 0;
4836 switch (GET_CODE (op))
4837 {
4838 case MINUS:
4839 case DIV:
4840 return 1;
4841 default:
4842 break;
4843 }
4844 return 0;
4845 }
4846
4847 int
4848 binary_float_operator (op, mode)
4849 rtx op;
4850 enum machine_mode mode;
4851 {
4852 if (GET_MODE (op) != mode)
4853 return 0;
4854 switch (GET_CODE (op))
4855 {
4856 case PLUS:
4857 case MINUS:
4858 case MULT:
4859 case DIV:
4860 return 1;
4861 default:
4862 break;
4863 }
4864 return 0;
4865 }
4866 \f
4867 /* Return the destination address of a branch. */
4868
4869 static int
4870 branch_dest (branch)
4871 rtx branch;
4872 {
4873 rtx dest = SET_SRC (PATTERN (branch));
4874 int dest_uid;
4875
4876 if (GET_CODE (dest) == IF_THEN_ELSE)
4877 dest = XEXP (dest, 1);
4878 dest = XEXP (dest, 0);
4879 dest_uid = INSN_UID (dest);
4880 return INSN_ADDRESSES (dest_uid);
4881 }
4882 \f
4883 /* Return non-zero if REG is not used after INSN.
4884 We assume REG is a reload reg, and therefore does
4885 not live past labels. It may live past calls or jumps though. */
4886 int
4887 reg_unused_after (reg, insn)
4888 rtx reg;
4889 rtx insn;
4890 {
4891 enum rtx_code code;
4892 rtx set;
4893
4894 /* If the reg is set by this instruction, then it is safe for our
4895 case. Disregard the case where this is a store to memory, since
4896 we are checking a register used in the store address. */
4897 set = single_set (insn);
4898 if (set && GET_CODE (SET_DEST (set)) != MEM
4899 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4900 return 1;
4901
4902 while ((insn = NEXT_INSN (insn)))
4903 {
4904 code = GET_CODE (insn);
4905
4906 #if 0
4907 /* If this is a label that existed before reload, then the register
4908 if dead here. However, if this is a label added by reorg, then
4909 the register may still be live here. We can't tell the difference,
4910 so we just ignore labels completely. */
4911 if (code == CODE_LABEL)
4912 return 1;
4913 /* else */
4914 #endif
4915
4916 if (code == JUMP_INSN)
4917 return 0;
4918
4919 /* If this is a sequence, we must handle them all at once.
4920 We could have for instance a call that sets the target register,
4921 and a insn in a delay slot that uses the register. In this case,
4922 we must return 0. */
4923 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4924 {
4925 int i;
4926 int retval = 0;
4927
4928 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
4929 {
4930 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
4931 rtx set = single_set (this_insn);
4932
4933 if (GET_CODE (this_insn) == CALL_INSN)
4934 code = CALL_INSN;
4935 else if (GET_CODE (this_insn) == JUMP_INSN)
4936 {
4937 if (INSN_ANNULLED_BRANCH_P (this_insn))
4938 return 0;
4939 code = JUMP_INSN;
4940 }
4941
4942 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4943 return 0;
4944 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4945 {
4946 if (GET_CODE (SET_DEST (set)) != MEM)
4947 retval = 1;
4948 else
4949 return 0;
4950 }
4951 if (set == 0
4952 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
4953 return 0;
4954 }
4955 if (retval == 1)
4956 return 1;
4957 else if (code == JUMP_INSN)
4958 return 0;
4959 }
4960 else if (GET_RTX_CLASS (code) == 'i')
4961 {
4962 rtx set = single_set (insn);
4963
4964 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4965 return 0;
4966 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4967 return GET_CODE (SET_DEST (set)) != MEM;
4968 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
4969 return 0;
4970 }
4971
4972 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
4973 return 1;
4974 }
4975 return 1;
4976 }
4977 \f
4978 extern struct obstack permanent_obstack;
4979
4980 rtx
4981 get_fpscr_rtx ()
4982 {
4983 static rtx fpscr_rtx;
4984
4985 if (! fpscr_rtx)
4986 {
4987 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
4988 REG_USERVAR_P (fpscr_rtx) = 1;
4989 ggc_add_rtx_root (&fpscr_rtx, 1);
4990 mark_user_reg (fpscr_rtx);
4991 }
4992 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
4993 mark_user_reg (fpscr_rtx);
4994 return fpscr_rtx;
4995 }
4996
4997 void
4998 emit_sf_insn (pat)
4999 rtx pat;
5000 {
5001 emit_insn (pat);
5002 }
5003
5004 void
5005 emit_df_insn (pat)
5006 rtx pat;
5007 {
5008 emit_insn (pat);
5009 }
5010
5011 void
5012 expand_sf_unop (fun, operands)
5013 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5014 rtx *operands;
5015 {
5016 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5017 }
5018
5019 void
5020 expand_sf_binop (fun, operands)
5021 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5022 rtx *operands;
5023 {
5024 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
5025 get_fpscr_rtx ()));
5026 }
5027
5028 void
5029 expand_df_unop (fun, operands)
5030 rtx (*fun) PARAMS ((rtx, rtx, rtx));
5031 rtx *operands;
5032 {
5033 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
5034 }
5035
5036 void
5037 expand_df_binop (fun, operands)
5038 rtx (*fun) PARAMS ((rtx, rtx, rtx, rtx));
5039 rtx *operands;
5040 {
5041 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
5042 get_fpscr_rtx ()));
5043 }
5044 \f
5045 /* ??? gcc does flow analysis strictly after common subexpression
5046 elimination. As a result, common subespression elimination fails
5047 when there are some intervening statements setting the same register.
5048 If we did nothing about this, this would hurt the precision switching
5049 for SH4 badly. There is some cse after reload, but it is unable to
5050 undo the extra register pressure from the unused instructions, and
5051 it cannot remove auto-increment loads.
5052
5053 A C code example that shows this flow/cse weakness for (at least) SH
5054 and sparc (as of gcc ss-970706) is this:
5055
5056 double
5057 f(double a)
5058 {
5059 double d;
5060 d = 0.1;
5061 a += d;
5062 d = 1.1;
5063 d = 0.1;
5064 a *= d;
5065 return a;
5066 }
5067
5068 So we add another pass before common subexpression elimination, to
5069 remove assignments that are dead due to a following assignment in the
5070 same basic block. */
5071
5072 static void
5073 mark_use (x, reg_set_block)
5074 rtx x, *reg_set_block;
5075 {
5076 enum rtx_code code;
5077
5078 if (! x)
5079 return;
5080 code = GET_CODE (x);
5081 switch (code)
5082 {
5083 case REG:
5084 {
5085 int regno = REGNO (x);
5086 int nregs = (regno < FIRST_PSEUDO_REGISTER
5087 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
5088 : 1);
5089 do
5090 {
5091 reg_set_block[regno + nregs - 1] = 0;
5092 }
5093 while (--nregs);
5094 break;
5095 }
5096 case SET:
5097 {
5098 rtx dest = SET_DEST (x);
5099
5100 if (GET_CODE (dest) == SUBREG)
5101 dest = SUBREG_REG (dest);
5102 if (GET_CODE (dest) != REG)
5103 mark_use (dest, reg_set_block);
5104 mark_use (SET_SRC (x), reg_set_block);
5105 break;
5106 }
5107 case CLOBBER:
5108 break;
5109 default:
5110 {
5111 const char *fmt = GET_RTX_FORMAT (code);
5112 int i, j;
5113 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5114 {
5115 if (fmt[i] == 'e')
5116 mark_use (XEXP (x, i), reg_set_block);
5117 else if (fmt[i] == 'E')
5118 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5119 mark_use (XVECEXP (x, i, j), reg_set_block);
5120 }
5121 break;
5122 }
5123 }
5124 }
5125 \f
5126 static rtx get_free_reg PARAMS ((HARD_REG_SET));
5127
5128 /* This function returns a register to use to load the address to load
5129 the fpscr from. Currently it always returns r1 or r7, but when we are
5130 able to use pseudo registers after combine, or have a better mechanism
5131 for choosing a register, it should be done here. */
5132 /* REGS_LIVE is the liveness information for the point for which we
5133 need this allocation. In some bare-bones exit blocks, r1 is live at the
5134 start. We can even have all of r0..r3 being live:
5135 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
5136 INSN before which new insns are placed with will clobber the register
5137 we return. If a basic block consists only of setting the return value
5138 register to a pseudo and using that register, the return value is not
5139 live before or after this block, yet we we'll insert our insns right in
5140 the middle. */
5141
5142 static rtx
5143 get_free_reg (regs_live)
5144 HARD_REG_SET regs_live;
5145 {
5146 rtx reg;
5147
5148 if (! TEST_HARD_REG_BIT (regs_live, 1))
5149 return gen_rtx_REG (Pmode, 1);
5150
5151 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
5152 there shouldn't be anything but a jump before the function end. */
5153 if (! TEST_HARD_REG_BIT (regs_live, 7))
5154 return gen_rtx_REG (Pmode, 7);
5155
5156 abort ();
5157 }
5158
5159 /* This function will set the fpscr from memory.
5160 MODE is the mode we are setting it to. */
5161 void
5162 fpscr_set_from_mem (mode, regs_live)
5163 int mode;
5164 HARD_REG_SET regs_live;
5165 {
5166 enum attr_fp_mode fp_mode = mode;
5167 rtx addr_reg = get_free_reg (regs_live);
5168
5169 emit_insn ((fp_mode == (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
5170 ? gen_fpu_switch1 : gen_fpu_switch0) (addr_reg));
5171 }
5172
5173 /* Is the given character a logical line separator for the assembler? */
5174 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
5175 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
5176 #endif
5177
5178 int
5179 sh_insn_length_adjustment (insn)
5180 rtx insn;
5181 {
5182 /* Instructions with unfilled delay slots take up an extra two bytes for
5183 the nop in the delay slot. */
5184 if (((GET_CODE (insn) == INSN
5185 && GET_CODE (PATTERN (insn)) != USE
5186 && GET_CODE (PATTERN (insn)) != CLOBBER)
5187 || GET_CODE (insn) == CALL_INSN
5188 || (GET_CODE (insn) == JUMP_INSN
5189 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
5190 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
5191 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
5192 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
5193 return 2;
5194
5195 /* sh-dsp parallel processing insn take four bytes instead of two. */
5196
5197 if (GET_CODE (insn) == INSN)
5198 {
5199 int sum = 0;
5200 rtx body = PATTERN (insn);
5201 const char *template;
5202 char c;
5203 int maybe_label = 1;
5204
5205 if (GET_CODE (body) == ASM_INPUT)
5206 template = XSTR (body, 0);
5207 else if (asm_noperands (body) >= 0)
5208 template
5209 = decode_asm_operands (body, NULL_PTR, NULL_PTR, NULL_PTR, NULL_PTR);
5210 else
5211 return 0;
5212 do
5213 {
5214 int ppi_adjust = 0;
5215
5216 do
5217 c = *template++;
5218 while (c == ' ' || c == '\t');
5219 /* all sh-dsp parallel-processing insns start with p.
5220 The only non-ppi sh insn starting with p is pref.
5221 The only ppi starting with pr is prnd. */
5222 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
5223 ppi_adjust = 2;
5224 /* The repeat pseudo-insn expands two three insns, a total of
5225 six bytes in size. */
5226 else if ((c == 'r' || c == 'R')
5227 && ! strncasecmp ("epeat", template, 5))
5228 ppi_adjust = 4;
5229 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
5230 {
5231 /* If this is a label, it is obviously not a ppi insn. */
5232 if (c == ':' && maybe_label)
5233 {
5234 ppi_adjust = 0;
5235 break;
5236 }
5237 else if (c == '\'' || c == '"')
5238 maybe_label = 0;
5239 c = *template++;
5240 }
5241 sum += ppi_adjust;
5242 maybe_label = c != ':';
5243 }
5244 while (c);
5245 return sum;
5246 }
5247 return 0;
5248 }
5249 \f
5250 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
5251 isn't protected by a PIC unspec. */
5252 int
5253 nonpic_symbol_mentioned_p (x)
5254 rtx x;
5255 {
5256 register const char *fmt;
5257 register int i;
5258
5259 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5260 return 1;
5261
5262 if (GET_CODE (x) == UNSPEC
5263 && (XINT (x, 1) == UNSPEC_PIC
5264 || XINT (x, 1) == UNSPEC_GOT
5265 || XINT (x, 1) == UNSPEC_GOTOFF
5266 || XINT (x, 1) == UNSPEC_PLT))
5267 return 0;
5268
5269 fmt = GET_RTX_FORMAT (GET_CODE (x));
5270 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5271 {
5272 if (fmt[i] == 'E')
5273 {
5274 register int j;
5275
5276 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5277 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
5278 return 1;
5279 }
5280 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
5281 return 1;
5282 }
5283
5284 return 0;
5285 }
5286
5287 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
5288 @GOTOFF in `reg'. */
5289 rtx
5290 legitimize_pic_address (orig, mode, reg)
5291 rtx orig;
5292 enum machine_mode mode;
5293 rtx reg;
5294 {
5295 if (GET_CODE (orig) == LABEL_REF
5296 || (GET_CODE (orig) == SYMBOL_REF
5297 && (CONSTANT_POOL_ADDRESS_P (orig)
5298 /* SYMBOL_REF_FLAG is set on static symbols. */
5299 || SYMBOL_REF_FLAG (orig))))
5300 {
5301 if (reg == 0)
5302 reg = gen_reg_rtx (Pmode);
5303
5304 emit_insn (gen_symGOTOFF2reg (reg, orig));
5305 return reg;
5306 }
5307 else if (GET_CODE (orig) == SYMBOL_REF)
5308 {
5309 if (reg == 0)
5310 reg = gen_reg_rtx (Pmode);
5311
5312 emit_insn (gen_symGOT2reg (reg, orig));
5313 return reg;
5314 }
5315 return orig;
5316 }
This page took 0.278092 seconds and 6 git commands to generate.