]>
Commit | Line | Data |
---|---|---|
16bea517 | 1 | /* Output routines for GCC for Hitachi Super-H. |
c5c76735 JL |
2 | Copyright (C) 1993-1998, 1999 Free Software Foundation, Inc. |
3 | Contributed by Steve Chamberlain (sac@cygnus.com). | |
4 | Improved by Jim Wilson (wilson@cygnus.com). | |
bc45ade3 | 5 | |
c15c9075 RK |
6 | This file is part of GNU CC. |
7 | ||
8 | GNU CC is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2, or (at your option) | |
11 | any later version. | |
12 | ||
13 | GNU CC is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with GNU CC; see the file COPYING. If not, write to | |
20 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
21 | Boston, MA 02111-1307, USA. */ | |
bc45ade3 | 22 | |
bc45ade3 | 23 | #include "config.h" |
c5c76735 | 24 | #include "system.h" |
bc45ade3 | 25 | #include "rtl.h" |
bc45ade3 | 26 | #include "tree.h" |
bc45ade3 | 27 | #include "flags.h" |
8aa2a305 | 28 | #include "insn-flags.h" |
bc45ade3 | 29 | #include "expr.h" |
49ad7cfa | 30 | #include "function.h" |
8aa2a305 JW |
31 | #include "regs.h" |
32 | #include "hard-reg-set.h" | |
33 | #include "output.h" | |
956a5206 | 34 | #include "insn-attr.h" |
bc45ade3 | 35 | |
1245df60 R |
36 | int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; |
37 | ||
00f8ff66 SC |
38 | #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) |
39 | #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) | |
40 | ||
20b04867 | 41 | /* ??? The pragma interrupt support will not work for SH3. */ |
8aa2a305 JW |
42 | /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to |
43 | output code for the next function appropriate for an interrupt handler. */ | |
0d7e008e | 44 | int pragma_interrupt; |
bc45ade3 | 45 | |
4408efce JL |
46 | /* This is set by the trap_exit attribute for functions. It specifies |
47 | a trap number to be used in a trapa instruction at function exit | |
48 | (instead of an rte instruction). */ | |
49 | int trap_exit; | |
50 | ||
51 | /* This is used by the sp_switch attribute for functions. It specifies | |
52 | a variable holding the address of the stack the interrupt function | |
53 | should switch to/from at entry/exit. */ | |
54 | rtx sp_switch; | |
55 | ||
8aa2a305 JW |
56 | /* This is set by #pragma trapa, and is similar to the above, except that |
57 | the compiler doesn't emit code to preserve all registers. */ | |
58 | static int pragma_trapa; | |
59 | ||
956a5206 JW |
60 | /* This is set by #pragma nosave_low_regs. This is useful on the SH3, |
61 | which has a separate set of low regs for User and Supervisor modes. | |
62 | This should only be used for the lowest level of interrupts. Higher levels | |
63 | of interrupts must save the registers in case they themselves are | |
64 | interrupted. */ | |
65 | int pragma_nosave_low_regs; | |
66 | ||
8aa2a305 JW |
67 | /* This is used for communication between SETUP_INCOMING_VARARGS and |
68 | sh_expand_prologue. */ | |
bc45ade3 | 69 | int current_function_anonymous_args; |
16bea517 | 70 | |
8aa2a305 JW |
71 | /* Global variables from toplev.c and final.c that are used within, but |
72 | not declared in any header file. */ | |
b9654711 | 73 | extern char *version_string; |
8aa2a305 | 74 | extern int *insn_addresses; |
00f8ff66 | 75 | |
bc45ade3 SC |
76 | /* Global variables for machine-dependent things. */ |
77 | ||
16bea517 JW |
78 | /* Which cpu are we scheduling for. */ |
79 | enum processor_type sh_cpu; | |
80 | ||
bc45ade3 | 81 | /* Saved operands from the last compare to use when we generate an scc |
16bea517 | 82 | or bcc insn. */ |
bc45ade3 SC |
83 | |
84 | rtx sh_compare_op0; | |
85 | rtx sh_compare_op1; | |
86 | ||
1245df60 | 87 | enum machine_mode sh_addr_diff_vec_mode; |
1245df60 | 88 | |
bc45ade3 | 89 | /* Provides the class number of the smallest class containing |
16bea517 | 90 | reg number. */ |
bc45ade3 SC |
91 | |
92 | int regno_reg_class[FIRST_PSEUDO_REGISTER] = | |
93 | { | |
8e87e161 | 94 | R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
bc45ade3 SC |
95 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
96 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, | |
97 | GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, | |
8e87e161 | 98 | GENERAL_REGS, PR_REGS, T_REGS, NO_REGS, |
2afeea0f | 99 | MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS, |
1a95a963 JW |
100 | FP0_REGS,FP_REGS, FP_REGS, FP_REGS, |
101 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
102 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
103 | FP_REGS, FP_REGS, FP_REGS, FP_REGS, | |
225e4f43 R |
104 | DF_REGS, DF_REGS, DF_REGS, DF_REGS, |
105 | DF_REGS, DF_REGS, DF_REGS, DF_REGS, | |
106 | FPSCR_REGS, | |
107 | }; | |
108 | ||
109 | char fp_reg_names[][5] = | |
110 | { | |
111 | "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7", | |
112 | "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15", | |
113 | "fpul", | |
114 | "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14", | |
bc45ade3 SC |
115 | }; |
116 | ||
117 | /* Provide reg_class from a letter such as appears in the machine | |
16bea517 | 118 | description. */ |
bc45ade3 SC |
119 | |
120 | enum reg_class reg_class_from_letter[] = | |
121 | { | |
225e4f43 | 122 | /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS, |
1a95a963 | 123 | /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS, |
bc45ade3 SC |
124 | /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS, |
125 | /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS, | |
126 | /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS, | |
1a95a963 JW |
127 | /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS, |
128 | /* y */ FPUL_REGS, /* z */ R0_REGS | |
bc45ade3 | 129 | }; |
1245df60 | 130 | |
225e4f43 R |
131 | int assembler_dialect; |
132 | ||
133 | rtx get_fpscr_rtx (); | |
134 | void emit_sf_insn (); | |
135 | void emit_df_insn (); | |
136 | ||
1245df60 | 137 | static void split_branches PROTO ((rtx)); |
b9654711 | 138 | \f |
16bea517 | 139 | /* Print the operand address in x to the stream. */ |
bc45ade3 SC |
140 | |
141 | void | |
142 | print_operand_address (stream, x) | |
143 | FILE *stream; | |
144 | rtx x; | |
145 | { | |
146 | switch (GET_CODE (x)) | |
147 | { | |
148 | case REG: | |
225e4f43 R |
149 | case SUBREG: |
150 | fprintf (stream, "@%s", reg_names[true_regnum (x)]); | |
bc45ade3 | 151 | break; |
8aa2a305 | 152 | |
bc45ade3 SC |
153 | case PLUS: |
154 | { | |
155 | rtx base = XEXP (x, 0); | |
156 | rtx index = XEXP (x, 1); | |
157 | ||
bc45ade3 SC |
158 | switch (GET_CODE (index)) |
159 | { | |
160 | case CONST_INT: | |
8aa2a305 | 161 | fprintf (stream, "@(%d,%s)", INTVAL (index), |
225e4f43 | 162 | reg_names[true_regnum (base)]); |
bc45ade3 SC |
163 | break; |
164 | ||
165 | case REG: | |
225e4f43 R |
166 | case SUBREG: |
167 | { | |
168 | int base_num = true_regnum (base); | |
169 | int index_num = true_regnum (index); | |
170 | ||
171 | fprintf (stream, "@(r0,%s)", | |
172 | reg_names[MAX (base_num, index_num)]); | |
173 | break; | |
174 | } | |
bc45ade3 SC |
175 | |
176 | default: | |
b9654711 | 177 | debug_rtx (x); |
bc45ade3 SC |
178 | abort (); |
179 | } | |
180 | } | |
bc45ade3 | 181 | break; |
8aa2a305 | 182 | |
bc45ade3 | 183 | case PRE_DEC: |
225e4f43 | 184 | fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); |
bc45ade3 SC |
185 | break; |
186 | ||
187 | case POST_INC: | |
225e4f43 | 188 | fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); |
bc45ade3 SC |
189 | break; |
190 | ||
191 | default: | |
192 | output_addr_const (stream, x); | |
193 | break; | |
194 | } | |
195 | } | |
196 | ||
197 | /* Print operand x (an rtx) in assembler syntax to file stream | |
198 | according to modifier code. | |
199 | ||
b9654711 | 200 | '.' print a .s if insn needs delay slot |
1245df60 | 201 | ',' print LOCAL_LABEL_PREFIX |
4408efce | 202 | '@' print trap, rte or rts depending upon pragma interruptness |
8aa2a305 JW |
203 | '#' output a nop if there is nothing to put in the delay slot |
204 | 'O' print a constant without the # | |
00f8ff66 | 205 | 'R' print the LSW of a dp value - changes if in little endian |
00f8ff66 | 206 | 'S' print the MSW of a dp value - changes if in little endian |
225e4f43 R |
207 | 'T' print the next word of a dp value - same as 'R' in big endian mode. |
208 | 'o' output an operator. */ | |
bc45ade3 SC |
209 | |
210 | void | |
211 | print_operand (stream, x, code) | |
212 | FILE *stream; | |
213 | rtx x; | |
214 | int code; | |
215 | { | |
216 | switch (code) | |
217 | { | |
b9654711 | 218 | case '.': |
79b2746a JW |
219 | if (final_sequence |
220 | && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) | |
1245df60 R |
221 | fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); |
222 | break; | |
223 | case ',': | |
224 | fprintf (stream, "%s", LOCAL_LABEL_PREFIX); | |
b9654711 | 225 | break; |
d3ae8277 | 226 | case '@': |
157131d7 SC |
227 | { |
228 | int interrupt_handler; | |
229 | ||
230 | if ((lookup_attribute | |
231 | ("interrupt_handler", | |
232 | DECL_MACHINE_ATTRIBUTES (current_function_decl))) | |
233 | != NULL_TREE) | |
234 | interrupt_handler = 1; | |
235 | else | |
236 | interrupt_handler = 0; | |
237 | ||
4408efce JL |
238 | if (trap_exit) |
239 | fprintf (stream, "trapa #%d", trap_exit); | |
157131d7 | 240 | else if (interrupt_handler) |
8e87e161 | 241 | fprintf (stream, "rte"); |
d3ae8277 | 242 | else |
8e87e161 | 243 | fprintf (stream, "rts"); |
d3ae8277 | 244 | break; |
157131d7 | 245 | } |
bc45ade3 | 246 | case '#': |
16bea517 | 247 | /* Output a nop if there's nothing in the delay slot. */ |
bc45ade3 | 248 | if (dbr_sequence_length () == 0) |
8aa2a305 | 249 | fprintf (stream, "\n\tnop"); |
bc45ade3 | 250 | break; |
b9654711 | 251 | case 'O': |
0d7e008e | 252 | output_addr_const (stream, x); |
bc45ade3 | 253 | break; |
bc45ade3 | 254 | case 'R': |
8aa2a305 JW |
255 | fputs (reg_names[REGNO (x) + LSW], (stream)); |
256 | break; | |
257 | case 'S': | |
258 | fputs (reg_names[REGNO (x) + MSW], (stream)); | |
00f8ff66 SC |
259 | break; |
260 | case 'T': | |
16bea517 | 261 | /* Next word of a double. */ |
bc45ade3 SC |
262 | switch (GET_CODE (x)) |
263 | { | |
264 | case REG: | |
265 | fputs (reg_names[REGNO (x) + 1], (stream)); | |
266 | break; | |
267 | case MEM: | |
225e4f43 R |
268 | if (GET_CODE (XEXP (x, 0)) != PRE_DEC |
269 | && GET_CODE (XEXP (x, 0)) != POST_INC) | |
270 | x = adj_offsettable_operand (x, 4); | |
271 | print_operand_address (stream, XEXP (x, 0)); | |
bc45ade3 SC |
272 | break; |
273 | } | |
274 | break; | |
225e4f43 R |
275 | case 'o': |
276 | switch (GET_CODE (x)) | |
277 | { | |
278 | case PLUS: fputs ("add", stream); break; | |
279 | case MINUS: fputs ("sub", stream); break; | |
280 | case MULT: fputs ("mul", stream); break; | |
281 | case DIV: fputs ("div", stream); break; | |
282 | } | |
283 | break; | |
bc45ade3 SC |
284 | default: |
285 | switch (GET_CODE (x)) | |
286 | { | |
287 | case REG: | |
225e4f43 R |
288 | if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG |
289 | && GET_MODE_SIZE (GET_MODE (x)) > 4) | |
290 | fprintf ((stream), "d%s", reg_names[REGNO (x)]+1); | |
291 | else | |
292 | fputs (reg_names[REGNO (x)], (stream)); | |
bc45ade3 SC |
293 | break; |
294 | case MEM: | |
295 | output_address (XEXP (x, 0)); | |
296 | break; | |
297 | default: | |
298 | fputc ('#', stream); | |
299 | output_addr_const (stream, x); | |
300 | break; | |
bc45ade3 SC |
301 | } |
302 | break; | |
303 | } | |
304 | } | |
bc45ade3 | 305 | \f |
0aa692fc R |
306 | static void force_into PROTO ((rtx, rtx)); |
307 | ||
308 | /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ | |
309 | static void | |
310 | force_into (value, target) | |
311 | rtx value, target; | |
312 | { | |
313 | value = force_operand (value, target); | |
314 | if (! rtx_equal_p (value, target)) | |
315 | emit_insn (gen_move_insn (target, value)); | |
316 | } | |
317 | ||
0d7e008e SC |
318 | /* Emit code to perform a block move. Choose the best method. |
319 | ||
320 | OPERANDS[0] is the destination. | |
321 | OPERANDS[1] is the source. | |
322 | OPERANDS[2] is the size. | |
323 | OPERANDS[3] is the alignment safe to use. */ | |
324 | ||
0d7e008e SC |
325 | int |
326 | expand_block_move (operands) | |
327 | rtx *operands; | |
328 | { | |
329 | int align = INTVAL (operands[3]); | |
330 | int constp = (GET_CODE (operands[2]) == CONST_INT); | |
331 | int bytes = (constp ? INTVAL (operands[2]) : 0); | |
0d7e008e | 332 | |
8aa2a305 JW |
333 | /* If it isn't a constant number of bytes, or if it doesn't have 4 byte |
334 | alignment, or if it isn't a multiple of 4 bytes, then fail. */ | |
335 | if (! constp || align < 4 || (bytes % 4 != 0)) | |
d3ae8277 SC |
336 | return 0; |
337 | ||
225e4f43 R |
338 | if (TARGET_HARD_SH4) |
339 | { | |
340 | if (bytes < 12) | |
341 | return 0; | |
342 | else if (bytes == 12) | |
343 | { | |
344 | tree entry_name; | |
345 | rtx func_addr_rtx; | |
346 | rtx r4 = gen_rtx (REG, SImode, 4); | |
347 | rtx r5 = gen_rtx (REG, SImode, 5); | |
348 | ||
349 | entry_name = get_identifier ("__movstrSI12_i4"); | |
350 | ||
351 | func_addr_rtx | |
352 | = copy_to_mode_reg (Pmode, | |
353 | gen_rtx_SYMBOL_REF (Pmode, | |
354 | IDENTIFIER_POINTER (entry_name))); | |
0aa692fc R |
355 | force_into (XEXP (operands[0], 0), r4); |
356 | force_into (XEXP (operands[1], 0), r5); | |
225e4f43 R |
357 | emit_insn (gen_block_move_real_i4 (func_addr_rtx)); |
358 | return 1; | |
359 | } | |
360 | else if (! TARGET_SMALLCODE) | |
361 | { | |
362 | tree entry_name; | |
363 | rtx func_addr_rtx; | |
364 | int dwords; | |
365 | rtx r4 = gen_rtx (REG, SImode, 4); | |
366 | rtx r5 = gen_rtx (REG, SImode, 5); | |
367 | rtx r6 = gen_rtx (REG, SImode, 6); | |
368 | ||
369 | entry_name = get_identifier (bytes & 4 | |
370 | ? "__movstr_i4_odd" | |
371 | : "__movstr_i4_even"); | |
372 | func_addr_rtx | |
373 | = copy_to_mode_reg (Pmode, | |
374 | gen_rtx_SYMBOL_REF (Pmode, | |
375 | IDENTIFIER_POINTER (entry_name))); | |
0aa692fc R |
376 | force_into (XEXP (operands[0], 0), r4); |
377 | force_into (XEXP (operands[1], 0), r5); | |
225e4f43 R |
378 | |
379 | dwords = bytes >> 3; | |
380 | emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); | |
381 | emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); | |
382 | return 1; | |
383 | } | |
384 | else | |
385 | return 0; | |
386 | } | |
8aa2a305 | 387 | if (bytes < 64) |
0d7e008e SC |
388 | { |
389 | char entry[30]; | |
390 | tree entry_name; | |
391 | rtx func_addr_rtx; | |
c5c76735 JL |
392 | rtx r4 = gen_rtx_REG (SImode, 4); |
393 | rtx r5 = gen_rtx_REG (SImode, 5); | |
8aa2a305 JW |
394 | |
395 | sprintf (entry, "__movstrSI%d", bytes); | |
0d7e008e SC |
396 | entry_name = get_identifier (entry); |
397 | ||
8aa2a305 JW |
398 | func_addr_rtx |
399 | = copy_to_mode_reg (Pmode, | |
c5c76735 JL |
400 | gen_rtx_SYMBOL_REF |
401 | (Pmode, IDENTIFIER_POINTER (entry_name))); | |
0aa692fc R |
402 | force_into (XEXP (operands[0], 0), r4); |
403 | force_into (XEXP (operands[1], 0), r5); | |
0d7e008e SC |
404 | emit_insn (gen_block_move_real (func_addr_rtx)); |
405 | return 1; | |
406 | } | |
8aa2a305 JW |
407 | |
408 | /* This is the same number of bytes as a memcpy call, but to a different | |
409 | less common function name, so this will occasionally use more space. */ | |
410 | if (! TARGET_SMALLCODE) | |
0d7e008e | 411 | { |
0d7e008e SC |
412 | tree entry_name; |
413 | rtx func_addr_rtx; | |
8aa2a305 | 414 | int final_switch, while_loop; |
c5c76735 JL |
415 | rtx r4 = gen_rtx_REG (SImode, 4); |
416 | rtx r5 = gen_rtx_REG (SImode, 5); | |
417 | rtx r6 = gen_rtx_REG (SImode, 6); | |
0d7e008e | 418 | |
8aa2a305 JW |
419 | entry_name = get_identifier ("__movstr"); |
420 | func_addr_rtx | |
421 | = copy_to_mode_reg (Pmode, | |
c5c76735 JL |
422 | gen_rtx_SYMBOL_REF |
423 | (Pmode, IDENTIFIER_POINTER (entry_name))); | |
0aa692fc R |
424 | force_into (XEXP (operands[0], 0), r4); |
425 | force_into (XEXP (operands[1], 0), r5); | |
0d7e008e | 426 | |
8aa2a305 JW |
427 | /* r6 controls the size of the move. 16 is decremented from it |
428 | for each 64 bytes moved. Then the negative bit left over is used | |
429 | as an index into a list of move instructions. e.g., a 72 byte move | |
430 | would be set up with size(r6) = 14, for one iteration through the | |
431 | big while loop, and a switch of -2 for the last part. */ | |
0d7e008e | 432 | |
8aa2a305 JW |
433 | final_switch = 16 - ((bytes / 4) % 16); |
434 | while_loop = ((bytes / 4) / 16 - 1) * 16; | |
435 | emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); | |
436 | emit_insn (gen_block_lump_real (func_addr_rtx)); | |
437 | return 1; | |
0d7e008e | 438 | } |
0d7e008e | 439 | |
d3ae8277 | 440 | return 0; |
0d7e008e SC |
441 | } |
442 | ||
bc45ade3 | 443 | /* Prepare operands for a move define_expand; specifically, one of the |
8aa2a305 | 444 | operands must be in a register. */ |
bc45ade3 | 445 | |
b9654711 | 446 | int |
bc45ade3 SC |
447 | prepare_move_operands (operands, mode) |
448 | rtx operands[]; | |
449 | enum machine_mode mode; | |
450 | { | |
e22856e8 JW |
451 | if (! reload_in_progress && ! reload_completed) |
452 | { | |
453 | /* Copy the source to a register if both operands aren't registers. */ | |
454 | if (! register_operand (operands[0], mode) | |
455 | && ! register_operand (operands[1], mode)) | |
456 | operands[1] = copy_to_mode_reg (mode, operands[1]); | |
457 | ||
458 | /* This case can happen while generating code to move the result | |
459 | of a library call to the target. Reject `st r0,@(rX,rY)' because | |
460 | reload will fail to find a spill register for rX, since r0 is already | |
461 | being used for the source. */ | |
462 | else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0 | |
463 | && GET_CODE (operands[0]) == MEM | |
464 | && GET_CODE (XEXP (operands[0], 0)) == PLUS | |
465 | && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG) | |
466 | operands[1] = copy_to_mode_reg (mode, operands[1]); | |
467 | } | |
8aa2a305 JW |
468 | |
469 | return 0; | |
470 | } | |
471 | ||
472 | /* Prepare the operands for an scc instruction; make sure that the | |
473 | compare has been done. */ | |
474 | rtx | |
475 | prepare_scc_operands (code) | |
476 | enum rtx_code code; | |
477 | { | |
c5c76735 | 478 | rtx t_reg = gen_rtx_REG (SImode, T_REG); |
8aa2a305 | 479 | enum rtx_code oldcode = code; |
9374bd85 | 480 | enum machine_mode mode; |
8aa2a305 JW |
481 | |
482 | /* First need a compare insn. */ | |
483 | switch (code) | |
bc45ade3 | 484 | { |
8aa2a305 JW |
485 | case NE: |
486 | /* It isn't possible to handle this case. */ | |
487 | abort (); | |
488 | case LT: | |
489 | code = GT; | |
490 | break; | |
491 | case LE: | |
492 | code = GE; | |
493 | break; | |
494 | case LTU: | |
495 | code = GTU; | |
496 | break; | |
497 | case LEU: | |
498 | code = GEU; | |
499 | break; | |
bc45ade3 | 500 | } |
8aa2a305 | 501 | if (code != oldcode) |
b9654711 | 502 | { |
8aa2a305 JW |
503 | rtx tmp = sh_compare_op0; |
504 | sh_compare_op0 = sh_compare_op1; | |
505 | sh_compare_op1 = tmp; | |
0d7e008e | 506 | } |
b9654711 | 507 | |
9374bd85 RK |
508 | mode = GET_MODE (sh_compare_op0); |
509 | if (mode == VOIDmode) | |
510 | mode = GET_MODE (sh_compare_op1); | |
511 | ||
512 | sh_compare_op0 = force_reg (mode, sh_compare_op0); | |
1245df60 R |
513 | if ((code != EQ && code != NE |
514 | && (sh_compare_op1 != const0_rtx | |
515 | || code == GTU || code == GEU || code == LTU || code == LEU)) | |
516 | || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT) | |
9374bd85 | 517 | sh_compare_op1 = force_reg (mode, sh_compare_op1); |
0d7e008e | 518 | |
225e4f43 R |
519 | if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) |
520 | (mode == SFmode ? emit_sf_insn : emit_df_insn) | |
521 | (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, | |
522 | gen_rtx (SET, VOIDmode, t_reg, | |
523 | gen_rtx (code, SImode, | |
524 | sh_compare_op0, sh_compare_op1)), | |
525 | gen_rtx (USE, VOIDmode, get_fpscr_rtx ())))); | |
526 | else | |
527 | emit_insn (gen_rtx (SET, VOIDmode, t_reg, | |
528 | gen_rtx (code, SImode, sh_compare_op0, | |
529 | sh_compare_op1))); | |
0d7e008e | 530 | |
8aa2a305 | 531 | return t_reg; |
bc45ade3 SC |
532 | } |
533 | ||
8aa2a305 JW |
534 | /* Called from the md file, set up the operands of a compare instruction. */ |
535 | ||
536 | void | |
537 | from_compare (operands, code) | |
538 | rtx *operands; | |
8e87e161 | 539 | int code; |
bc45ade3 | 540 | { |
1245df60 R |
541 | enum machine_mode mode = GET_MODE (sh_compare_op0); |
542 | rtx insn; | |
543 | if (mode == VOIDmode) | |
544 | mode = GET_MODE (sh_compare_op1); | |
545 | if (code != EQ | |
546 | || mode == DImode | |
547 | || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) | |
bc45ade3 | 548 | { |
8aa2a305 | 549 | /* Force args into regs, since we can't use constants here. */ |
24a25d45 | 550 | sh_compare_op0 = force_reg (mode, sh_compare_op0); |
8aa2a305 | 551 | if (sh_compare_op1 != const0_rtx |
1245df60 R |
552 | || code == GTU || code == GEU |
553 | || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)) | |
24a25d45 | 554 | sh_compare_op1 = force_reg (mode, sh_compare_op1); |
bc45ade3 | 555 | } |
1245df60 R |
556 | if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE) |
557 | { | |
558 | from_compare (operands, GT); | |
559 | insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1); | |
560 | } | |
561 | else | |
c5c76735 JL |
562 | insn = gen_rtx_SET (VOIDmode, |
563 | gen_rtx_REG (SImode, 18), | |
564 | gen_rtx (code, SImode, sh_compare_op0, | |
565 | sh_compare_op1)); | |
225e4f43 R |
566 | if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) |
567 | { | |
568 | insn = gen_rtx (PARALLEL, VOIDmode, | |
569 | gen_rtvec (2, insn, | |
570 | gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))); | |
571 | (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); | |
572 | } | |
573 | else | |
574 | emit_insn (insn); | |
bc45ade3 SC |
575 | } |
576 | \f | |
16bea517 | 577 | /* Functions to output assembly code. */ |
bc45ade3 | 578 | |
b9654711 | 579 | /* Return a sequence of instructions to perform DI or DF move. |
bc45ade3 | 580 | |
b9654711 | 581 | Since the SH cannot move a DI or DF in one instruction, we have |
16bea517 | 582 | to take care when we see overlapping source and dest registers. */ |
0d7e008e | 583 | |
bc45ade3 | 584 | char * |
0d7e008e SC |
585 | output_movedouble (insn, operands, mode) |
586 | rtx insn; | |
bc45ade3 SC |
587 | rtx operands[]; |
588 | enum machine_mode mode; | |
589 | { | |
b9654711 SC |
590 | rtx dst = operands[0]; |
591 | rtx src = operands[1]; | |
b9654711 | 592 | |
0d7e008e | 593 | if (GET_CODE (dst) == MEM |
16bea517 | 594 | && GET_CODE (XEXP (dst, 0)) == PRE_DEC) |
8aa2a305 JW |
595 | return "mov.l %T1,%0\n\tmov.l %1,%0"; |
596 | ||
b9654711 SC |
597 | if (register_operand (dst, mode) |
598 | && register_operand (src, mode)) | |
bc45ade3 | 599 | { |
b9654711 | 600 | if (REGNO (src) == MACH_REG) |
00f8ff66 | 601 | return "sts mach,%S0\n\tsts macl,%R0"; |
bc45ade3 | 602 | |
8aa2a305 JW |
603 | /* When mov.d r1,r2 do r2->r3 then r1->r2; |
604 | when mov.d r1,r0 do r1->r0 then r2->r1. */ | |
b9654711 SC |
605 | |
606 | if (REGNO (src) + 1 == REGNO (dst)) | |
5325c0fa | 607 | return "mov %T1,%T0\n\tmov %1,%0"; |
b9654711 | 608 | else |
5325c0fa | 609 | return "mov %1,%0\n\tmov %T1,%T0"; |
b9654711 SC |
610 | } |
611 | else if (GET_CODE (src) == CONST_INT) | |
bc45ade3 | 612 | { |
8aa2a305 JW |
613 | if (INTVAL (src) < 0) |
614 | output_asm_insn ("mov #-1,%S0", operands); | |
bc45ade3 | 615 | else |
8aa2a305 | 616 | output_asm_insn ("mov #0,%S0", operands); |
bc45ade3 | 617 | |
8aa2a305 | 618 | return "mov %1,%R0"; |
0d7e008e | 619 | } |
b9654711 | 620 | else if (GET_CODE (src) == MEM) |
bc45ade3 | 621 | { |
8aa2a305 | 622 | int ptrreg = -1; |
b9654711 SC |
623 | int dreg = REGNO (dst); |
624 | rtx inside = XEXP (src, 0); | |
bc45ade3 SC |
625 | |
626 | if (GET_CODE (inside) == REG) | |
8aa2a305 | 627 | ptrreg = REGNO (inside); |
79b2746a JW |
628 | else if (GET_CODE (inside) == SUBREG) |
629 | ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside); | |
bc45ade3 SC |
630 | else if (GET_CODE (inside) == PLUS) |
631 | { | |
8aa2a305 JW |
632 | ptrreg = REGNO (XEXP (inside, 0)); |
633 | /* ??? A r0+REG address shouldn't be possible here, because it isn't | |
634 | an offsettable address. Unfortunately, offsettable addresses use | |
635 | QImode to check the offset, and a QImode offsettable address | |
636 | requires r0 for the other operand, which is not currently | |
637 | supported, so we can't use the 'o' constraint. | |
638 | Thus we must check for and handle r0+REG addresses here. | |
639 | We punt for now, since this is likely very rare. */ | |
640 | if (GET_CODE (XEXP (inside, 1)) == REG) | |
641 | abort (); | |
bc45ade3 | 642 | } |
0d7e008e | 643 | else if (GET_CODE (inside) == LABEL_REF) |
8aa2a305 | 644 | return "mov.l %1,%0\n\tmov.l %1+4,%T0"; |
8e87e161 | 645 | else if (GET_CODE (inside) == POST_INC) |
8aa2a305 | 646 | return "mov.l %1,%0\n\tmov.l %1,%T0"; |
bc45ade3 SC |
647 | else |
648 | abort (); | |
649 | ||
8aa2a305 JW |
650 | /* Work out the safe way to copy. Copy into the second half first. */ |
651 | if (dreg == ptrreg) | |
652 | return "mov.l %T1,%T0\n\tmov.l %1,%0"; | |
bc45ade3 SC |
653 | } |
654 | ||
00f8ff66 | 655 | return "mov.l %1,%0\n\tmov.l %T1,%T0"; |
bc45ade3 SC |
656 | } |
657 | ||
8aa2a305 JW |
658 | /* Print an instruction which would have gone into a delay slot after |
659 | another instruction, but couldn't because the other instruction expanded | |
660 | into a sequence where putting the slot insn at the end wouldn't work. */ | |
0d7e008e | 661 | |
8aa2a305 JW |
662 | static void |
663 | print_slot (insn) | |
664 | rtx insn; | |
665 | { | |
666 | final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1); | |
b9654711 | 667 | |
8aa2a305 JW |
668 | INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; |
669 | } | |
0d7e008e | 670 | |
0d7e008e SC |
671 | char * |
672 | output_far_jump (insn, op) | |
673 | rtx insn; | |
674 | rtx op; | |
675 | { | |
1245df60 R |
676 | struct { rtx lab, reg, op; } this; |
677 | char *jump; | |
678 | int far; | |
33f7f353 | 679 | int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)]; |
0d7e008e | 680 | |
1245df60 | 681 | this.lab = gen_label_rtx (); |
b9654711 | 682 | |
1a953d0f R |
683 | if (TARGET_SH2 |
684 | && offset >= -32764 | |
685 | && offset - get_attr_length (insn) <= 32766) | |
1245df60 R |
686 | { |
687 | far = 0; | |
688 | jump = "mov.w %O0,%1;braf %1"; | |
689 | } | |
690 | else | |
691 | { | |
692 | far = 1; | |
693 | jump = "mov.l %O0,%1;jmp @%1"; | |
694 | } | |
695 | /* If we have a scratch register available, use it. */ | |
696 | if (GET_CODE (PREV_INSN (insn)) == INSN | |
697 | && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch) | |
698 | { | |
699 | this.reg = SET_DEST (PATTERN (PREV_INSN (insn))); | |
700 | output_asm_insn (jump, &this.lab); | |
701 | if (dbr_sequence_length ()) | |
702 | print_slot (final_sequence); | |
703 | else | |
704 | output_asm_insn ("nop", 0); | |
705 | } | |
706 | else | |
707 | { | |
708 | /* Output the delay slot insn first if any. */ | |
709 | if (dbr_sequence_length ()) | |
710 | print_slot (final_sequence); | |
711 | ||
c5c76735 | 712 | this.reg = gen_rtx_REG (SImode, 13); |
1245df60 R |
713 | output_asm_insn ("mov.l r13,@-r15", 0); |
714 | output_asm_insn (jump, &this.lab); | |
715 | output_asm_insn ("mov.l @r15+,r13", 0); | |
716 | } | |
717 | if (far) | |
718 | output_asm_insn (".align 2", 0); | |
719 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab)); | |
720 | this.op = op; | |
721 | output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab); | |
0d7e008e SC |
722 | return ""; |
723 | } | |
bc45ade3 | 724 | |
8aa2a305 JW |
725 | /* Local label counter, used for constants in the pool and inside |
726 | pattern branches. */ | |
727 | ||
728 | static int lf = 100; | |
729 | ||
730 | /* Output code for ordinary branches. */ | |
731 | ||
bc45ade3 | 732 | char * |
8aa2a305 | 733 | output_branch (logic, insn, operands) |
bc45ade3 | 734 | int logic; |
b9654711 | 735 | rtx insn; |
8aa2a305 | 736 | rtx *operands; |
bc45ade3 | 737 | { |
33f7f353 | 738 | switch (get_attr_length (insn)) |
bc45ade3 | 739 | { |
33f7f353 JR |
740 | case 6: |
741 | /* This can happen if filling the delay slot has caused a forward | |
742 | branch to exceed its range (we could reverse it, but only | |
743 | when we know we won't overextend other branches; this should | |
744 | best be handled by relaxation). | |
745 | It can also happen when other condbranches hoist delay slot insn | |
1245df60 R |
746 | from their destination, thus leading to code size increase. |
747 | But the branch will still be in the range -4092..+4098 bytes. */ | |
bc45ade3 | 748 | |
33f7f353 | 749 | if (! TARGET_RELAX) |
1245df60 | 750 | { |
33f7f353 JR |
751 | int label = lf++; |
752 | /* The call to print_slot will clobber the operands. */ | |
753 | rtx op0 = operands[0]; | |
754 | ||
755 | /* If the instruction in the delay slot is annulled (true), then | |
756 | there is no delay slot where we can put it now. The only safe | |
757 | place for it is after the label. final will do that by default. */ | |
758 | ||
759 | if (final_sequence | |
760 | && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) | |
761 | { | |
762 | asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", | |
763 | ASSEMBLER_DIALECT ? "/" : ".", label); | |
764 | print_slot (final_sequence); | |
765 | } | |
766 | else | |
767 | asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); | |
768 | ||
769 | output_asm_insn ("bra\t%l0", &op0); | |
770 | fprintf (asm_out_file, "\tnop\n"); | |
771 | ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label); | |
772 | ||
773 | return ""; | |
1245df60 | 774 | } |
33f7f353 JR |
775 | /* When relaxing, handle this like a short branch. The linker |
776 | will fix it up if it still doesn't fit after relaxation. */ | |
777 | case 2: | |
778 | return logic ? "bt%.\t%l0" : "bf%.\t%l0"; | |
779 | default: | |
780 | abort (); | |
1245df60 | 781 | } |
1245df60 | 782 | } |
bc45ade3 | 783 | |
1245df60 R |
784 | char * |
785 | output_branchy_insn (code, template, insn, operands) | |
786 | char *template; | |
787 | enum rtx_code code; | |
788 | rtx insn; | |
789 | rtx *operands; | |
790 | { | |
791 | rtx next_insn = NEXT_INSN (insn); | |
792 | int label_nr; | |
79b2746a | 793 | |
1245df60 R |
794 | if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn)) |
795 | { | |
796 | rtx src = SET_SRC (PATTERN (next_insn)); | |
797 | if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) | |
798 | { | |
799 | /* Following branch not taken */ | |
800 | operands[9] = gen_label_rtx (); | |
801 | emit_label_after (operands[9], next_insn); | |
802 | return template; | |
803 | } | |
804 | else | |
805 | { | |
33f7f353 JR |
806 | int offset = (branch_dest (next_insn) |
807 | - insn_addresses[INSN_UID (next_insn)] + 4); | |
808 | if (offset >= -252 && offset <= 258) | |
1245df60 R |
809 | { |
810 | if (GET_CODE (src) == IF_THEN_ELSE) | |
811 | /* branch_true */ | |
812 | src = XEXP (src, 1); | |
813 | operands[9] = src; | |
814 | return template; | |
815 | } | |
816 | } | |
bc45ade3 | 817 | } |
1245df60 R |
818 | operands[9] = gen_label_rtx (); |
819 | emit_label_after (operands[9], insn); | |
820 | return template; | |
821 | } | |
e4fa6b06 | 822 | |
1245df60 R |
823 | char * |
824 | output_ieee_ccmpeq (insn, operands) | |
825 | rtx insn, operands; | |
826 | { | |
827 | output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands); | |
bc45ade3 | 828 | } |
8e87e161 | 829 | \f |
8aa2a305 JW |
830 | /* Output to FILE the start of the assembler file. */ |
831 | ||
0d7e008e | 832 | void |
adade868 | 833 | output_file_start (file) |
0d7e008e | 834 | FILE *file; |
bc45ade3 | 835 | { |
0d7e008e | 836 | register int pos; |
b9654711 | 837 | |
0d7e008e | 838 | output_file_directive (file, main_input_filename); |
b9654711 | 839 | |
0d7e008e SC |
840 | /* Switch to the data section so that the coffsem symbol and the |
841 | gcc2_compiled. symbol aren't in the text section. */ | |
842 | data_section (); | |
b9654711 | 843 | |
8aa2a305 JW |
844 | if (TARGET_LITTLE_ENDIAN) |
845 | fprintf (file, "\t.little\n"); | |
bc45ade3 | 846 | } |
0d7e008e | 847 | \f |
16bea517 | 848 | /* Actual number of instructions used to make a shift by N. */ |
8aa2a305 | 849 | static char ashiftrt_insns[] = |
16bea517 JW |
850 | { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; |
851 | ||
852 | /* Left shift and logical right shift are the same. */ | |
8aa2a305 | 853 | static char shift_insns[] = |
16bea517 | 854 | { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; |
8aa2a305 | 855 | |
16bea517 JW |
856 | /* Individual shift amounts needed to get the above length sequences. |
857 | One bit right shifts clobber the T bit, so when possible, put one bit | |
858 | shifts in the middle of the sequence, so the ends are eligible for | |
859 | branch delay slots. */ | |
8aa2a305 | 860 | static short shift_amounts[32][5] = { |
16bea517 JW |
861 | {0}, {1}, {2}, {2, 1}, |
862 | {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, | |
863 | {8}, {8, 1}, {8, 2}, {8, 1, 2}, | |
864 | {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, | |
865 | {16}, {16, 1}, {16, 2}, {16, 1, 2}, | |
866 | {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, | |
867 | {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, | |
1d3534c0 | 868 | {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; |
16bea517 | 869 | |
8d481241 JW |
870 | /* Likewise, but for shift amounts < 16, up to three highmost bits |
871 | might be clobbered. This is typically used when combined with some | |
872 | kind of sign or zero extension. */ | |
873 | ||
874 | static char ext_shift_insns[] = | |
875 | { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; | |
876 | ||
877 | static short ext_shift_amounts[32][4] = { | |
878 | {0}, {1}, {2}, {2, 1}, | |
879 | {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, | |
880 | {8}, {8, 1}, {8, 2}, {8, 1, 2}, | |
881 | {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, | |
882 | {16}, {16, 1}, {16, 2}, {16, 1, 2}, | |
883 | {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, | |
884 | {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, | |
885 | {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; | |
886 | ||
ae9d19c0 JR |
887 | /* Assuming we have a value that has been sign-extended by at least one bit, |
888 | can we use the ext_shift_amounts with the last shift turned to an arithmetic shift | |
889 | to shift it by N without data loss, and quicker than by other means? */ | |
890 | #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) | |
891 | ||
16bea517 JW |
892 | /* This is used in length attributes in sh.md to help compute the length |
893 | of arbitrary constant shift instructions. */ | |
bc45ade3 | 894 | |
16bea517 JW |
895 | int |
896 | shift_insns_rtx (insn) | |
897 | rtx insn; | |
898 | { | |
899 | rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); | |
900 | int shift_count = INTVAL (XEXP (set_src, 1)); | |
901 | enum rtx_code shift_code = GET_CODE (set_src); | |
00f8ff66 | 902 | |
16bea517 JW |
903 | switch (shift_code) |
904 | { | |
905 | case ASHIFTRT: | |
906 | return ashiftrt_insns[shift_count]; | |
907 | case LSHIFTRT: | |
908 | case ASHIFT: | |
909 | return shift_insns[shift_count]; | |
910 | default: | |
911 | abort(); | |
912 | } | |
913 | } | |
914 | ||
16bea517 | 915 | /* Return the cost of a shift. */ |
bc45ade3 | 916 | |
0d7e008e | 917 | int |
8aa2a305 JW |
918 | shiftcosts (x) |
919 | rtx x; | |
bc45ade3 | 920 | { |
8aa2a305 JW |
921 | int value = INTVAL (XEXP (x, 1)); |
922 | ||
16bea517 | 923 | /* If shift by a non constant, then this will be expensive. */ |
8aa2a305 | 924 | if (GET_CODE (XEXP (x, 1)) != CONST_INT) |
1245df60 | 925 | return SH_DYNAMIC_SHIFT_COST; |
bc45ade3 | 926 | |
16bea517 | 927 | /* Otherwise, return the true cost in instructions. */ |
8aa2a305 | 928 | if (GET_CODE (x) == ASHIFTRT) |
49b6d06b JW |
929 | { |
930 | int cost = ashiftrt_insns[value]; | |
931 | /* If SH3, then we put the constant in a reg and use shad. */ | |
1245df60 R |
932 | if (cost > 1 + SH_DYNAMIC_SHIFT_COST) |
933 | cost = 1 + SH_DYNAMIC_SHIFT_COST; | |
49b6d06b JW |
934 | return cost; |
935 | } | |
8aa2a305 JW |
936 | else |
937 | return shift_insns[value]; | |
0d7e008e | 938 | } |
b9654711 | 939 | |
8aa2a305 JW |
940 | /* Return the cost of an AND operation. */ |
941 | ||
8e87e161 | 942 | int |
8aa2a305 JW |
943 | andcosts (x) |
944 | rtx x; | |
0d7e008e SC |
945 | { |
946 | int i; | |
8aa2a305 | 947 | |
a6f71af5 | 948 | /* Anding with a register is a single cycle and instruction. */ |
8aa2a305 | 949 | if (GET_CODE (XEXP (x, 1)) != CONST_INT) |
a6f71af5 | 950 | return 1; |
8aa2a305 JW |
951 | |
952 | i = INTVAL (XEXP (x, 1)); | |
a6f71af5 | 953 | /* These constants are single cycle extu.[bw] instructions. */ |
0d7e008e | 954 | if (i == 0xff || i == 0xffff) |
a6f71af5 JW |
955 | return 1; |
956 | /* Constants that can be used in an and immediate instruction is a single | |
957 | cycle, but this requires r0, so make it a little more expensive. */ | |
958 | if (CONST_OK_FOR_L (i)) | |
0d7e008e | 959 | return 2; |
a6f71af5 JW |
960 | /* Constants that can be loaded with a mov immediate and an and. |
961 | This case is probably unnecessary. */ | |
0d7e008e | 962 | if (CONST_OK_FOR_I (i)) |
a6f71af5 JW |
963 | return 2; |
964 | /* Any other constants requires a 2 cycle pc-relative load plus an and. | |
965 | This case is probably unnecessary. */ | |
966 | return 3; | |
0d7e008e | 967 | } |
d3ae8277 | 968 | |
16bea517 | 969 | /* Return the cost of a multiply. */ |
0d7e008e | 970 | int |
8aa2a305 JW |
971 | multcosts (x) |
972 | rtx x; | |
0d7e008e SC |
973 | { |
974 | if (TARGET_SH2) | |
d3ae8277 SC |
975 | { |
976 | /* We have a mul insn, so we can never take more than the mul and the | |
a7771f78 | 977 | read of the mac reg, but count more because of the latency and extra |
16bea517 | 978 | reg usage. */ |
d3ae8277 | 979 | if (TARGET_SMALLCODE) |
8e87e161 | 980 | return 2; |
a7771f78 | 981 | return 3; |
d3ae8277 SC |
982 | } |
983 | ||
a7771f78 | 984 | /* If we're aiming at small code, then just count the number of |
16bea517 | 985 | insns in a multiply call sequence. */ |
8e87e161 | 986 | if (TARGET_SMALLCODE) |
8aa2a305 | 987 | return 5; |
d3ae8277 | 988 | |
16bea517 | 989 | /* Otherwise count all the insns in the routine we'd be calling too. */ |
d3ae8277 | 990 | return 20; |
0d7e008e | 991 | } |
b9654711 | 992 | |
16bea517 | 993 | /* Code to expand a shift. */ |
b9654711 | 994 | |
0d7e008e SC |
995 | void |
996 | gen_ashift (type, n, reg) | |
997 | int type; | |
998 | int n; | |
999 | rtx reg; | |
1000 | { | |
16bea517 JW |
1001 | /* Negative values here come from the shift_amounts array. */ |
1002 | if (n < 0) | |
1003 | { | |
1004 | if (type == ASHIFT) | |
1005 | type = LSHIFTRT; | |
1006 | else | |
1007 | type = ASHIFT; | |
1008 | n = -n; | |
1009 | } | |
1010 | ||
0d7e008e | 1011 | switch (type) |
bc45ade3 | 1012 | { |
0d7e008e SC |
1013 | case ASHIFTRT: |
1014 | emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); | |
1015 | break; | |
1016 | case LSHIFTRT: | |
16bea517 JW |
1017 | if (n == 1) |
1018 | emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); | |
1019 | else | |
1020 | emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); | |
0d7e008e SC |
1021 | break; |
1022 | case ASHIFT: | |
5325c0fa | 1023 | emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n))); |
0d7e008e | 1024 | break; |
bc45ade3 | 1025 | } |
bc45ade3 | 1026 | } |
bc45ade3 | 1027 | |
8d481241 JW |
1028 | /* Same for HImode */ |
1029 | ||
1030 | void | |
1031 | gen_ashift_hi (type, n, reg) | |
1032 | int type; | |
1033 | int n; | |
1034 | rtx reg; | |
1035 | { | |
1036 | /* Negative values here come from the shift_amounts array. */ | |
1037 | if (n < 0) | |
1038 | { | |
1039 | if (type == ASHIFT) | |
1040 | type = LSHIFTRT; | |
1041 | else | |
1042 | type = ASHIFT; | |
1043 | n = -n; | |
1044 | } | |
1045 | ||
1046 | switch (type) | |
1047 | { | |
1048 | case ASHIFTRT: | |
8d481241 | 1049 | case LSHIFTRT: |
d0c42859 R |
1050 | /* We don't have HImode right shift operations because using the |
1051 | ordinary 32 bit shift instructions for that doesn't generate proper | |
1052 | zero/sign extension. | |
1053 | gen_ashift_hi is only called in contexts where we know that the | |
1054 | sign extension works out correctly. */ | |
97d6fd65 R |
1055 | { |
1056 | int word = 0; | |
1057 | if (GET_CODE (reg) == SUBREG) | |
1058 | { | |
1059 | word = SUBREG_WORD (reg); | |
1060 | reg = SUBREG_REG (reg); | |
1061 | } | |
1062 | gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word)); | |
1063 | break; | |
1064 | } | |
8d481241 JW |
1065 | case ASHIFT: |
1066 | emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); | |
1067 | break; | |
1068 | } | |
1069 | } | |
1070 | ||
8aa2a305 JW |
1071 | /* Output RTL to split a constant shift into its component SH constant |
1072 | shift instructions. */ | |
1073 | ||
0d7e008e SC |
1074 | int |
1075 | gen_shifty_op (code, operands) | |
1076 | int code; | |
1077 | rtx *operands; | |
bc45ade3 | 1078 | { |
16bea517 | 1079 | int value = INTVAL (operands[2]); |
8aa2a305 | 1080 | int max, i; |
00f8ff66 | 1081 | |
cff3d762 JW |
1082 | /* Truncate the shift count in case it is out of bounds. */ |
1083 | value = value & 0x1f; | |
1084 | ||
8aa2a305 | 1085 | if (value == 31) |
16bea517 | 1086 | { |
8aa2a305 | 1087 | if (code == LSHIFTRT) |
0d7e008e | 1088 | { |
8aa2a305 JW |
1089 | emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); |
1090 | emit_insn (gen_movt (operands[0])); | |
1091 | return; | |
16bea517 | 1092 | } |
8aa2a305 | 1093 | else if (code == ASHIFT) |
16bea517 | 1094 | { |
8aa2a305 JW |
1095 | /* There is a two instruction sequence for 31 bit left shifts, |
1096 | but it requires r0. */ | |
1097 | if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0) | |
0d7e008e | 1098 | { |
8aa2a305 JW |
1099 | emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); |
1100 | emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); | |
1101 | return; | |
0d7e008e | 1102 | } |
16bea517 | 1103 | } |
8aa2a305 | 1104 | } |
c17f53a8 JW |
1105 | else if (value == 0) |
1106 | { | |
1107 | /* This can happen when not optimizing. We must output something here | |
1108 | to prevent the compiler from aborting in final.c after the try_split | |
1109 | call. */ | |
1110 | emit_insn (gen_nop ()); | |
1111 | return; | |
1112 | } | |
a9f71ad8 | 1113 | |
8aa2a305 JW |
1114 | max = shift_insns[value]; |
1115 | for (i = 0; i < max; i++) | |
1116 | gen_ashift (code, shift_amounts[value][i], operands[0]); | |
1117 | } | |
8d481241 JW |
1118 | |
1119 | /* Same as above, but optimized for values where the topmost bits don't | |
1120 | matter. */ | |
1121 | ||
1122 | int | |
1123 | gen_shifty_hi_op (code, operands) | |
1124 | int code; | |
1125 | rtx *operands; | |
1126 | { | |
1127 | int value = INTVAL (operands[2]); | |
1128 | int max, i; | |
1129 | void (*gen_fun)(); | |
1130 | ||
1131 | /* This operation is used by and_shl for SImode values with a few | |
1132 | high bits known to be cleared. */ | |
1133 | value &= 31; | |
1134 | if (value == 0) | |
1135 | { | |
1136 | emit_insn (gen_nop ()); | |
1137 | return; | |
1138 | } | |
1139 | ||
1140 | gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; | |
1141 | if (code == ASHIFT) | |
1142 | { | |
1143 | max = ext_shift_insns[value]; | |
1144 | for (i = 0; i < max; i++) | |
1145 | gen_fun (code, ext_shift_amounts[value][i], operands[0]); | |
1146 | } | |
1147 | else | |
1148 | /* When shifting right, emit the shifts in reverse order, so that | |
1149 | solitary negative values come first. */ | |
1150 | for (i = ext_shift_insns[value] - 1; i >= 0; i--) | |
1151 | gen_fun (code, ext_shift_amounts[value][i], operands[0]); | |
1152 | } | |
8aa2a305 JW |
1153 | |
1154 | /* Output RTL for an arithmetic right shift. */ | |
1155 | ||
1156 | /* ??? Rewrite to use super-optimizer sequences. */ | |
1157 | ||
1158 | int | |
1159 | expand_ashiftrt (operands) | |
1160 | rtx *operands; | |
1161 | { | |
1162 | rtx wrk; | |
1163 | char func[18]; | |
1164 | tree func_name; | |
1165 | int value; | |
1166 | ||
49b6d06b | 1167 | if (TARGET_SH3) |
20b04867 | 1168 | { |
49b6d06b JW |
1169 | if (GET_CODE (operands[2]) != CONST_INT) |
1170 | { | |
1171 | rtx count = copy_to_mode_reg (SImode, operands[2]); | |
1172 | emit_insn (gen_negsi2 (count, count)); | |
1173 | emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); | |
1174 | return 1; | |
1175 | } | |
1245df60 R |
1176 | else if (ashiftrt_insns[INTVAL (operands[2]) & 31] |
1177 | > 1 + SH_DYNAMIC_SHIFT_COST) | |
49b6d06b | 1178 | { |
1245df60 R |
1179 | rtx count |
1180 | = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); | |
49b6d06b JW |
1181 | emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); |
1182 | return 1; | |
1183 | } | |
20b04867 | 1184 | } |
8aa2a305 JW |
1185 | if (GET_CODE (operands[2]) != CONST_INT) |
1186 | return 0; | |
1187 | ||
1245df60 | 1188 | value = INTVAL (operands[2]) & 31; |
8aa2a305 JW |
1189 | |
1190 | if (value == 31) | |
1191 | { | |
1192 | emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); | |
1193 | return 1; | |
1194 | } | |
1195 | else if (value >= 16 && value <= 19) | |
1196 | { | |
1197 | wrk = gen_reg_rtx (SImode); | |
1198 | emit_insn (gen_ashrsi2_16 (wrk, operands[1])); | |
1199 | value -= 16; | |
1200 | while (value--) | |
1201 | gen_ashift (ASHIFTRT, 1, wrk); | |
1202 | emit_move_insn (operands[0], wrk); | |
1203 | return 1; | |
a9f71ad8 | 1204 | } |
8aa2a305 JW |
1205 | /* Expand a short sequence inline, longer call a magic routine. */ |
1206 | else if (value <= 5) | |
1207 | { | |
1208 | wrk = gen_reg_rtx (SImode); | |
1209 | emit_move_insn (wrk, operands[1]); | |
1210 | while (value--) | |
1211 | gen_ashift (ASHIFTRT, 1, wrk); | |
1212 | emit_move_insn (operands[0], wrk); | |
1213 | return 1; | |
1214 | } | |
1215 | ||
1216 | wrk = gen_reg_rtx (Pmode); | |
1217 | ||
1218 | /* Load the value into an arg reg and call a helper. */ | |
c5c76735 | 1219 | emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); |
8aa2a305 JW |
1220 | sprintf (func, "__ashiftrt_r4_%d", value); |
1221 | func_name = get_identifier (func); | |
c5c76735 JL |
1222 | emit_move_insn (wrk, gen_rtx_SYMBOL_REF (Pmode, |
1223 | IDENTIFIER_POINTER (func_name))); | |
8aa2a305 | 1224 | emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); |
c5c76735 | 1225 | emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); |
8aa2a305 | 1226 | return 1; |
bc45ade3 | 1227 | } |
8d481241 | 1228 | |
1245df60 R |
1229 | int sh_dynamicalize_shift_p (count) |
1230 | rtx count; | |
1231 | { | |
1232 | return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST; | |
1233 | } | |
1234 | ||
8d481241 JW |
1235 | /* Try to find a good way to implement the combiner pattern |
1236 | [(set (match_operand:SI 0 "register_operand" "r") | |
1237 | (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") | |
1238 | (match_operand:SI 2 "const_int_operand" "n")) | |
1239 | (match_operand:SI 3 "const_int_operand" "n"))) . | |
1240 | LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. | |
1241 | return 0 for simple right / left or left/right shift combination. | |
1242 | return 1 for a combination of shifts with zero_extend. | |
1243 | return 2 for a combination of shifts with an AND that needs r0. | |
1244 | return 3 for a combination of shifts with an AND that needs an extra | |
1245 | scratch register, when the three highmost bits of the AND mask are clear. | |
1246 | return 4 for a combination of shifts with an AND that needs an extra | |
1247 | scratch register, when any of the three highmost bits of the AND mask | |
1248 | is set. | |
1249 | If ATTRP is set, store an initial right shift width in ATTRP[0], | |
1250 | and the instruction length in ATTRP[1] . These values are not valid | |
1251 | when returning 0. | |
1252 | When ATTRP is set and returning 1, ATTRP[2] gets set to the index into | |
1253 | shift_amounts for the last shift value that is to be used before the | |
1254 | sign extend. */ | |
1255 | int | |
1256 | shl_and_kind (left_rtx, mask_rtx, attrp) | |
1257 | rtx left_rtx, mask_rtx; | |
1258 | int *attrp; | |
1259 | { | |
1260 | unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; | |
1261 | int left = INTVAL (left_rtx), right; | |
1262 | int best = 0; | |
1263 | int cost, best_cost = 10000; | |
1264 | int best_right = 0, best_len = 0; | |
1265 | int i; | |
1266 | int can_ext; | |
1267 | ||
1268 | if (left < 0 || left > 31) | |
1269 | return 0; | |
1270 | if (GET_CODE (mask_rtx) == CONST_INT) | |
1271 | mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; | |
1272 | else | |
1273 | mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; | |
1274 | /* Can this be expressed as a right shift / left shift pair ? */ | |
1275 | lsb = ((mask ^ (mask - 1)) >> 1) + 1; | |
1276 | right = exact_log2 (lsb); | |
1277 | mask2 = ~(mask + lsb - 1); | |
1278 | lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; | |
1279 | /* mask has no zeroes but trailing zeroes <==> ! mask2 */ | |
1280 | if (! mask2) | |
1281 | best_cost = shift_insns[right] + shift_insns[right + left]; | |
1282 | /* mask has no trailing zeroes <==> ! right */ | |
1283 | else if (! right && mask2 == ~(lsb2 - 1)) | |
1284 | { | |
1285 | int late_right = exact_log2 (lsb2); | |
1286 | best_cost = shift_insns[left + late_right] + shift_insns[late_right]; | |
1287 | } | |
1288 | /* Try to use zero extend */ | |
1289 | if (mask2 == ~(lsb2 - 1)) | |
1290 | { | |
1291 | int width, first; | |
1292 | ||
1293 | for (width = 8; width <= 16; width += 8) | |
1294 | { | |
1295 | /* Can we zero-extend right away? */ | |
1296 | if (lsb2 == (HOST_WIDE_INT)1 << width) | |
1297 | { | |
1298 | cost | |
1299 | = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; | |
1300 | if (cost < best_cost) | |
1301 | { | |
1302 | best = 1; | |
1303 | best_cost = cost; | |
1304 | best_right = right; | |
1305 | best_len = cost; | |
1306 | if (attrp) | |
1307 | attrp[2] = -1; | |
1308 | } | |
1309 | continue; | |
1310 | } | |
1311 | /* ??? Could try to put zero extend into initial right shift, | |
1312 | or even shift a bit left before the right shift. */ | |
1313 | /* Determine value of first part of left shift, to get to the | |
1314 | zero extend cut-off point. */ | |
1315 | first = width - exact_log2 (lsb2) + right; | |
1316 | if (first >= 0 && right + left - first >= 0) | |
1317 | { | |
1318 | cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 | |
1319 | + ext_shift_insns[right + left - first]; | |
1320 | if (cost < best_cost) | |
1321 | { | |
1322 | best = 1; | |
1323 | best_cost = cost; | |
1324 | best_right = right; | |
1325 | best_len = cost; | |
1326 | if (attrp) | |
1327 | attrp[2] = first; | |
1328 | } | |
1329 | } | |
1330 | } | |
1331 | } | |
1332 | /* Try to use r0 AND pattern */ | |
1333 | for (i = 0; i <= 2; i++) | |
1334 | { | |
1335 | if (i > right) | |
1336 | break; | |
1337 | if (! CONST_OK_FOR_L (mask >> i)) | |
1338 | continue; | |
1339 | cost = (i != 0) + 2 + ext_shift_insns[left + i]; | |
1340 | if (cost < best_cost) | |
1341 | { | |
1342 | best = 2; | |
1343 | best_cost = cost; | |
1344 | best_right = i; | |
1345 | best_len = cost - 1; | |
1346 | } | |
1347 | } | |
1348 | /* Try to use a scratch register to hold the AND operand. */ | |
1349 | can_ext = ((mask << left) & 0xe0000000) == 0; | |
1350 | for (i = 0; i <= 2; i++) | |
1351 | { | |
1352 | if (i > right) | |
1353 | break; | |
1354 | cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3) | |
6ab911bb | 1355 | + (can_ext ? ext_shift_insns : shift_insns)[left + i]; |
8d481241 JW |
1356 | if (cost < best_cost) |
1357 | { | |
1358 | best = 4 - can_ext; | |
1359 | best_cost = cost; | |
1360 | best_right = i; | |
1361 | best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i); | |
1362 | } | |
1363 | } | |
1364 | ||
1365 | if (attrp) | |
1366 | { | |
1367 | attrp[0] = best_right; | |
1368 | attrp[1] = best_len; | |
1369 | } | |
1370 | return best; | |
1371 | } | |
1372 | ||
1373 | /* This is used in length attributes of the unnamed instructions | |
1374 | corresponding to shl_and_kind return values of 1 and 2. */ | |
1375 | int | |
1376 | shl_and_length (insn) | |
1377 | rtx insn; | |
1378 | { | |
1379 | rtx set_src, left_rtx, mask_rtx; | |
1380 | int attributes[3]; | |
1381 | ||
1382 | set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); | |
1383 | left_rtx = XEXP (XEXP (set_src, 0), 1); | |
1384 | mask_rtx = XEXP (set_src, 1); | |
1385 | shl_and_kind (left_rtx, mask_rtx, attributes); | |
1386 | return attributes[1]; | |
1387 | } | |
1388 | ||
1389 | /* This is used in length attribute of the and_shl_scratch instruction. */ | |
1390 | ||
1391 | int | |
1392 | shl_and_scr_length (insn) | |
1393 | rtx insn; | |
1394 | { | |
1395 | rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); | |
1396 | int len = shift_insns[INTVAL (XEXP (set_src, 1))]; | |
1397 | rtx op = XEXP (set_src, 0); | |
1398 | len += shift_insns[INTVAL (XEXP (op, 1))] + 1; | |
1399 | op = XEXP (XEXP (op, 0), 0); | |
1400 | return len + shift_insns[INTVAL (XEXP (op, 1))]; | |
1401 | } | |
1402 | ||
1403 | /* Generating rtl? */ | |
1404 | extern int rtx_equal_function_value_matters; | |
1405 | ||
1406 | /* Generate rtl for instructions for which shl_and_kind advised a particular | |
1407 | method of generating them, i.e. returned zero. */ | |
1408 | ||
1409 | int | |
1410 | gen_shl_and (dest, left_rtx, mask_rtx, source) | |
1411 | rtx dest, left_rtx, mask_rtx, source; | |
1412 | { | |
1413 | int attributes[3]; | |
1414 | unsigned HOST_WIDE_INT mask; | |
1415 | int kind = shl_and_kind (left_rtx, mask_rtx, attributes); | |
1416 | int right, total_shift; | |
1417 | int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op; | |
1418 | ||
1419 | right = attributes[0]; | |
1420 | total_shift = INTVAL (left_rtx) + right; | |
1421 | mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; | |
1422 | switch (kind) | |
1423 | { | |
1424 | default: | |
1425 | return -1; | |
1426 | case 1: | |
1427 | { | |
1428 | int first = attributes[2]; | |
1429 | rtx operands[3]; | |
1430 | ||
1431 | if (first < 0) | |
1432 | { | |
7174c937 | 1433 | emit_insn ((mask << right) <= 0xff |
afad3d2c JW |
1434 | ? gen_zero_extendqisi2(dest, |
1435 | gen_lowpart (QImode, source)) | |
1436 | : gen_zero_extendhisi2(dest, | |
1437 | gen_lowpart (HImode, source))); | |
8d481241 JW |
1438 | source = dest; |
1439 | } | |
1440 | if (source != dest) | |
1441 | emit_insn (gen_movsi (dest, source)); | |
1442 | operands[0] = dest; | |
1443 | if (right) | |
1444 | { | |
1445 | operands[2] = GEN_INT (right); | |
1446 | gen_shifty_hi_op (LSHIFTRT, operands); | |
1447 | } | |
1448 | if (first > 0) | |
1449 | { | |
1450 | operands[2] = GEN_INT (first); | |
1451 | gen_shifty_hi_op (ASHIFT, operands); | |
1452 | total_shift -= first; | |
1453 | mask <<= first; | |
1454 | } | |
1455 | if (first >= 0) | |
7174c937 | 1456 | emit_insn (mask <= 0xff |
afad3d2c JW |
1457 | ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest)) |
1458 | : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest))); | |
8d481241 JW |
1459 | if (total_shift > 0) |
1460 | { | |
1461 | operands[2] = GEN_INT (total_shift); | |
1462 | gen_shifty_hi_op (ASHIFT, operands); | |
1463 | } | |
1464 | break; | |
1465 | } | |
1466 | case 4: | |
1467 | shift_gen_fun = gen_shifty_op; | |
8d481241 | 1468 | case 3: |
24c50999 JR |
1469 | /* If the topmost bit that matters is set, set the topmost bits |
1470 | that don't matter. This way, we might be able to get a shorter | |
1471 | signed constant. */ | |
1472 | if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift)) | |
1473 | mask |= (HOST_WIDE_INT)~0 << (31 - total_shift); | |
61fb6bac | 1474 | case 2: |
8d481241 JW |
1475 | /* Don't expand fine-grained when combining, because that will |
1476 | make the pattern fail. */ | |
1477 | if (rtx_equal_function_value_matters | |
1478 | || reload_in_progress || reload_completed) | |
1479 | { | |
1480 | rtx operands[3]; | |
1481 | ||
61fb6bac R |
1482 | /* Cases 3 and 4 should be handled by this split |
1483 | only while combining */ | |
1484 | if (kind > 2) | |
1485 | abort (); | |
8d481241 JW |
1486 | if (right) |
1487 | { | |
1488 | emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); | |
1489 | source = dest; | |
1490 | } | |
1491 | emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); | |
afad3d2c JW |
1492 | if (total_shift) |
1493 | { | |
1494 | operands[0] = dest; | |
1495 | operands[1] = dest; | |
1496 | operands[2] = GEN_INT (total_shift); | |
1497 | shift_gen_fun (ASHIFT, operands); | |
1498 | } | |
8d481241 JW |
1499 | break; |
1500 | } | |
1501 | else | |
1502 | { | |
1503 | int neg = 0; | |
1504 | if (kind != 4 && total_shift < 16) | |
1505 | { | |
1506 | neg = -ext_shift_amounts[total_shift][1]; | |
1507 | if (neg > 0) | |
1508 | neg -= ext_shift_amounts[total_shift][2]; | |
1509 | else | |
1510 | neg = 0; | |
1511 | } | |
1512 | emit_insn (gen_and_shl_scratch (dest, source, | |
1513 | GEN_INT (right), | |
1514 | GEN_INT (mask), | |
1515 | GEN_INT (total_shift + neg), | |
1516 | GEN_INT (neg))); | |
1517 | emit_insn (gen_movsi (dest, dest)); | |
1518 | break; | |
1519 | } | |
1520 | } | |
1521 | return 0; | |
1522 | } | |
1523 | ||
1524 | /* Try to find a good way to implement the combiner pattern | |
1525 | [(set (match_operand:SI 0 "register_operand" "=r") | |
1526 | (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") | |
1527 | (match_operand:SI 2 "const_int_operand" "n") | |
1528 | (match_operand:SI 3 "const_int_operand" "n") | |
1529 | (const_int 0))) | |
1530 | (clobber (reg:SI 18))] | |
1531 | LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. | |
1532 | return 0 for simple left / right shift combination. | |
1533 | return 1 for left shift / 8 bit sign extend / left shift. | |
1534 | return 2 for left shift / 16 bit sign extend / left shift. | |
1535 | return 3 for left shift / 8 bit sign extend / shift / sign extend. | |
1536 | return 4 for left shift / 16 bit sign extend / shift / sign extend. | |
1537 | return 5 for left shift / 16 bit sign extend / right shift | |
1538 | return 6 for < 8 bit sign extend / left shift. | |
1539 | return 7 for < 8 bit sign extend / left shift / single right shift. | |
1540 | If COSTP is nonzero, assign the calculated cost to *COSTP. */ | |
1541 | ||
1542 | int | |
1543 | shl_sext_kind (left_rtx, size_rtx, costp) | |
1544 | rtx left_rtx, size_rtx; | |
1545 | int *costp; | |
1546 | { | |
1547 | int left, size, insize, ext; | |
1548 | int cost, best_cost; | |
1549 | int kind; | |
1550 | ||
1551 | left = INTVAL (left_rtx); | |
1552 | size = INTVAL (size_rtx); | |
1553 | insize = size - left; | |
1554 | if (insize <= 0) | |
1555 | abort (); | |
1556 | /* Default to left / right shift. */ | |
1557 | kind = 0; | |
1558 | best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; | |
1559 | if (size <= 16) | |
1560 | { | |
1561 | /* 16 bit shift / sign extend / 16 bit shift */ | |
1562 | cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; | |
afad3d2c JW |
1563 | /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden |
1564 | below, by alternative 3 or something even better. */ | |
8d481241 JW |
1565 | if (cost < best_cost) |
1566 | { | |
1567 | kind = 5; | |
1568 | best_cost = cost; | |
1569 | } | |
1570 | } | |
1571 | /* Try a plain sign extend between two shifts. */ | |
1572 | for (ext = 16; ext >= insize; ext -= 8) | |
1573 | { | |
1574 | if (ext <= size) | |
1575 | { | |
1576 | cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; | |
1577 | if (cost < best_cost) | |
1578 | { | |
1579 | kind = ext / 8U; | |
1580 | best_cost = cost; | |
1581 | } | |
1582 | } | |
ae9d19c0 JR |
1583 | /* Check if we can do a sloppy shift with a final signed shift |
1584 | restoring the sign. */ | |
1585 | if (EXT_SHIFT_SIGNED (size - ext)) | |
1586 | cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; | |
1587 | /* If not, maybe it's still cheaper to do the second shift sloppy, | |
1588 | and do a final sign extend? */ | |
1589 | else if (size <= 16) | |
1590 | cost = ext_shift_insns[ext - insize] + 1 | |
1591 | + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; | |
1592 | else | |
1593 | continue; | |
1594 | if (cost < best_cost) | |
8d481241 | 1595 | { |
ae9d19c0 JR |
1596 | kind = ext / 8U + 2; |
1597 | best_cost = cost; | |
8d481241 JW |
1598 | } |
1599 | } | |
1600 | /* Check if we can sign extend in r0 */ | |
1601 | if (insize < 8) | |
1602 | { | |
1603 | cost = 3 + shift_insns[left]; | |
1604 | if (cost < best_cost) | |
1605 | { | |
1606 | kind = 6; | |
1607 | best_cost = cost; | |
1608 | } | |
1609 | /* Try the same with a final signed shift. */ | |
1610 | if (left < 31) | |
1611 | { | |
1612 | cost = 3 + ext_shift_insns[left + 1] + 1; | |
1613 | if (cost < best_cost) | |
1614 | { | |
1615 | kind = 7; | |
1616 | best_cost = cost; | |
1617 | } | |
1618 | } | |
1619 | } | |
1620 | if (TARGET_SH3) | |
1621 | { | |
1622 | /* Try to use a dynamic shift. */ | |
1245df60 | 1623 | cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; |
8d481241 JW |
1624 | if (cost < best_cost) |
1625 | { | |
1626 | kind = 0; | |
1627 | best_cost = cost; | |
1628 | } | |
1629 | } | |
1630 | if (costp) | |
1631 | *costp = cost; | |
1632 | return kind; | |
1633 | } | |
1634 | ||
1635 | /* Function to be used in the length attribute of the instructions | |
1636 | implementing this pattern. */ | |
1637 | ||
1638 | int | |
1639 | shl_sext_length (insn) | |
1640 | rtx insn; | |
1641 | { | |
1642 | rtx set_src, left_rtx, size_rtx; | |
1643 | int cost; | |
1644 | ||
1645 | set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); | |
1646 | left_rtx = XEXP (XEXP (set_src, 0), 1); | |
1647 | size_rtx = XEXP (set_src, 1); | |
1648 | shl_sext_kind (left_rtx, size_rtx, &cost); | |
1649 | return cost; | |
1650 | } | |
1651 | ||
1652 | /* Generate rtl for this pattern */ | |
1653 | ||
1654 | int | |
1655 | gen_shl_sext (dest, left_rtx, size_rtx, source) | |
1656 | rtx dest, left_rtx, size_rtx, source; | |
1657 | { | |
1658 | int kind; | |
d00d338c | 1659 | int left, size, insize, cost; |
8d481241 JW |
1660 | rtx operands[3]; |
1661 | ||
d00d338c | 1662 | kind = shl_sext_kind (left_rtx, size_rtx, &cost); |
8d481241 JW |
1663 | left = INTVAL (left_rtx); |
1664 | size = INTVAL (size_rtx); | |
1665 | insize = size - left; | |
1666 | switch (kind) | |
1667 | { | |
1668 | case 1: | |
1669 | case 2: | |
1670 | case 3: | |
1671 | case 4: | |
1672 | { | |
1673 | int ext = kind & 1 ? 8 : 16; | |
1674 | int shift2 = size - ext; | |
1675 | ||
1676 | /* Don't expand fine-grained when combining, because that will | |
1677 | make the pattern fail. */ | |
1678 | if (! rtx_equal_function_value_matters | |
1679 | && ! reload_in_progress && ! reload_completed) | |
1680 | { | |
1681 | emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); | |
1682 | emit_insn (gen_movsi (dest, source)); | |
1683 | break; | |
1684 | } | |
1685 | if (dest != source) | |
1686 | emit_insn (gen_movsi (dest, source)); | |
1687 | operands[0] = dest; | |
afad3d2c JW |
1688 | if (ext - insize) |
1689 | { | |
1690 | operands[2] = GEN_INT (ext - insize); | |
1691 | gen_shifty_hi_op (ASHIFT, operands); | |
1692 | } | |
8d481241 | 1693 | emit_insn (kind & 1 |
afad3d2c JW |
1694 | ? gen_extendqisi2(dest, gen_lowpart (QImode, dest)) |
1695 | : gen_extendhisi2(dest, gen_lowpart (HImode, dest))); | |
8d481241 JW |
1696 | if (kind <= 2) |
1697 | { | |
afad3d2c JW |
1698 | if (shift2) |
1699 | { | |
1700 | operands[2] = GEN_INT (shift2); | |
1701 | gen_shifty_op (ASHIFT, operands); | |
1702 | } | |
8d481241 JW |
1703 | } |
1704 | else | |
1705 | { | |
afad3d2c | 1706 | if (shift2 > 0) |
8d481241 | 1707 | { |
ae9d19c0 JR |
1708 | if (EXT_SHIFT_SIGNED (shift2)) |
1709 | { | |
1710 | operands[2] = GEN_INT (shift2 + 1); | |
1711 | gen_shifty_op (ASHIFT, operands); | |
1712 | operands[2] = GEN_INT (1); | |
1713 | gen_shifty_op (ASHIFTRT, operands); | |
1714 | break; | |
1715 | } | |
8d481241 JW |
1716 | operands[2] = GEN_INT (shift2); |
1717 | gen_shifty_hi_op (ASHIFT, operands); | |
1718 | } | |
afad3d2c | 1719 | else if (shift2) |
8d481241 JW |
1720 | { |
1721 | operands[2] = GEN_INT (-shift2); | |
1722 | gen_shifty_hi_op (LSHIFTRT, operands); | |
1723 | } | |
1724 | emit_insn (size <= 8 | |
afad3d2c JW |
1725 | ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) |
1726 | : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); | |
8d481241 JW |
1727 | } |
1728 | break; | |
1729 | } | |
1730 | case 5: | |
afad3d2c JW |
1731 | { |
1732 | int i = 16 - size; | |
913d8e13 R |
1733 | if (! rtx_equal_function_value_matters |
1734 | && ! reload_in_progress && ! reload_completed) | |
1735 | emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); | |
1736 | else | |
1737 | { | |
1738 | operands[0] = dest; | |
1739 | operands[2] = GEN_INT (16 - insize); | |
1740 | gen_shifty_hi_op (ASHIFT, operands); | |
1741 | emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); | |
1742 | } | |
afad3d2c JW |
1743 | /* Don't use gen_ashrsi3 because it generates new pseudos. */ |
1744 | while (--i >= 0) | |
1745 | gen_ashift (ASHIFTRT, 1, dest); | |
1746 | break; | |
1747 | } | |
8d481241 JW |
1748 | case 6: |
1749 | case 7: | |
1750 | /* Don't expand fine-grained when combining, because that will | |
1751 | make the pattern fail. */ | |
1752 | if (! rtx_equal_function_value_matters | |
1753 | && ! reload_in_progress && ! reload_completed) | |
1754 | { | |
1755 | emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); | |
1756 | emit_insn (gen_movsi (dest, source)); | |
1757 | break; | |
1758 | } | |
1759 | emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); | |
1760 | emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); | |
1761 | emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); | |
1762 | operands[0] = dest; | |
1763 | operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; | |
1764 | gen_shifty_op (ASHIFT, operands); | |
1765 | if (kind == 7) | |
1766 | emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1))); | |
1767 | break; | |
1768 | default: | |
1769 | return -1; | |
1770 | } | |
1771 | return 0; | |
1772 | } | |
8aa2a305 JW |
1773 | \f |
1774 | /* The SH cannot load a large constant into a register, constants have to | |
1775 | come from a pc relative load. The reference of a pc relative load | |
1776 | instruction must be less than 1k infront of the instruction. This | |
1777 | means that we often have to dump a constant inside a function, and | |
1778 | generate code to branch around it. | |
bc45ade3 | 1779 | |
8aa2a305 JW |
1780 | It is important to minimize this, since the branches will slow things |
1781 | down and make things bigger. | |
1782 | ||
1783 | Worst case code looks like: | |
1784 | ||
1785 | mov.l L1,rn | |
1786 | bra L2 | |
1787 | nop | |
1788 | align | |
1789 | L1: .long value | |
1790 | L2: | |
1791 | .. | |
1792 | ||
1793 | mov.l L3,rn | |
1794 | bra L4 | |
1795 | nop | |
1796 | align | |
1797 | L3: .long value | |
1798 | L4: | |
1799 | .. | |
1800 | ||
1801 | We fix this by performing a scan before scheduling, which notices which | |
1802 | instructions need to have their operands fetched from the constant table | |
1803 | and builds the table. | |
1804 | ||
1805 | The algorithm is: | |
1806 | ||
1807 | scan, find an instruction which needs a pcrel move. Look forward, find the | |
1808 | last barrier which is within MAX_COUNT bytes of the requirement. | |
1809 | If there isn't one, make one. Process all the instructions between | |
1810 | the find and the barrier. | |
1811 | ||
1812 | In the above example, we can tell that L3 is within 1k of L1, so | |
1813 | the first move can be shrunk from the 3 insn+constant sequence into | |
1814 | just 1 insn, and the constant moved to L3 to make: | |
1815 | ||
1816 | mov.l L1,rn | |
1817 | .. | |
1818 | mov.l L3,rn | |
1819 | bra L4 | |
1820 | nop | |
1821 | align | |
1822 | L3:.long value | |
1823 | L4:.long value | |
1824 | ||
1825 | Then the second move becomes the target for the shortening process. */ | |
1826 | ||
1827 | typedef struct | |
1828 | { | |
1829 | rtx value; /* Value in table. */ | |
1830 | rtx label; /* Label of value. */ | |
1831 | enum machine_mode mode; /* Mode of value. */ | |
1832 | } pool_node; | |
1833 | ||
1834 | /* The maximum number of constants that can fit into one pool, since | |
1835 | the pc relative range is 0...1020 bytes and constants are at least 4 | |
1836 | bytes long. */ | |
1837 | ||
1838 | #define MAX_POOL_SIZE (1020/4) | |
1839 | static pool_node pool_vector[MAX_POOL_SIZE]; | |
1840 | static int pool_size; | |
1841 | ||
1842 | /* ??? If we need a constant in HImode which is the truncated value of a | |
1843 | constant we need in SImode, we could combine the two entries thus saving | |
1844 | two bytes. Is this common enough to be worth the effort of implementing | |
1845 | it? */ | |
1846 | ||
1847 | /* ??? This stuff should be done at the same time that we shorten branches. | |
1848 | As it is now, we must assume that all branches are the maximum size, and | |
1849 | this causes us to almost always output constant pools sooner than | |
1850 | necessary. */ | |
1851 | ||
1852 | /* Add a constant to the pool and return its label. */ | |
1853 | ||
1854 | static rtx | |
225e4f43 R |
1855 | add_constant (x, mode, last_value) |
1856 | rtx last_value; | |
8aa2a305 JW |
1857 | rtx x; |
1858 | enum machine_mode mode; | |
0d7e008e SC |
1859 | { |
1860 | int i; | |
8aa2a305 JW |
1861 | rtx lab; |
1862 | ||
1863 | /* First see if we've already got it. */ | |
1864 | for (i = 0; i < pool_size; i++) | |
0d7e008e | 1865 | { |
8aa2a305 JW |
1866 | if (x->code == pool_vector[i].value->code |
1867 | && mode == pool_vector[i].mode) | |
0d7e008e | 1868 | { |
8aa2a305 JW |
1869 | if (x->code == CODE_LABEL) |
1870 | { | |
1871 | if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) | |
1872 | continue; | |
1873 | } | |
1874 | if (rtx_equal_p (x, pool_vector[i].value)) | |
225e4f43 R |
1875 | { |
1876 | lab = 0; | |
1877 | if (! last_value | |
1878 | || ! i | |
1879 | || ! rtx_equal_p (last_value, pool_vector[i-1].value)) | |
1880 | { | |
1881 | lab = pool_vector[i].label; | |
1882 | if (! lab) | |
1883 | pool_vector[i].label = lab = gen_label_rtx (); | |
1884 | } | |
1885 | return lab; | |
1886 | } | |
0d7e008e | 1887 | } |
0d7e008e | 1888 | } |
b9654711 | 1889 | |
8aa2a305 JW |
1890 | /* Need a new one. */ |
1891 | pool_vector[pool_size].value = x; | |
225e4f43 R |
1892 | if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) |
1893 | lab = 0; | |
1894 | else | |
1895 | lab = gen_label_rtx (); | |
8aa2a305 JW |
1896 | pool_vector[pool_size].mode = mode; |
1897 | pool_vector[pool_size].label = lab; | |
1898 | pool_size++; | |
1899 | return lab; | |
0d7e008e | 1900 | } |
16bea517 JW |
1901 | |
1902 | /* Output the literal table. */ | |
b9654711 | 1903 | |
b9654711 | 1904 | static void |
0d7e008e SC |
1905 | dump_table (scan) |
1906 | rtx scan; | |
b9654711 | 1907 | { |
0d7e008e | 1908 | int i; |
0d7e008e | 1909 | int need_align = 1; |
b9654711 | 1910 | |
16bea517 | 1911 | /* Do two passes, first time dump out the HI sized constants. */ |
b9654711 | 1912 | |
0d7e008e | 1913 | for (i = 0; i < pool_size; i++) |
b9654711 | 1914 | { |
8aa2a305 JW |
1915 | pool_node *p = &pool_vector[i]; |
1916 | ||
0d7e008e SC |
1917 | if (p->mode == HImode) |
1918 | { | |
1919 | if (need_align) | |
1920 | { | |
1921 | scan = emit_insn_after (gen_align_2 (), scan); | |
1922 | need_align = 0; | |
1923 | } | |
1924 | scan = emit_label_after (p->label, scan); | |
1925 | scan = emit_insn_after (gen_consttable_2 (p->value), scan); | |
1926 | } | |
b9654711 | 1927 | } |
8aa2a305 | 1928 | |
0d7e008e | 1929 | need_align = 1; |
b9654711 | 1930 | |
0d7e008e | 1931 | for (i = 0; i < pool_size; i++) |
b9654711 | 1932 | { |
8aa2a305 | 1933 | pool_node *p = &pool_vector[i]; |
b9654711 | 1934 | |
0d7e008e | 1935 | switch (p->mode) |
b9654711 | 1936 | { |
0d7e008e SC |
1937 | case HImode: |
1938 | break; | |
1939 | case SImode: | |
e577c183 | 1940 | case SFmode: |
0d7e008e | 1941 | if (need_align) |
b9654711 | 1942 | { |
0d7e008e | 1943 | need_align = 0; |
d3ae8277 | 1944 | scan = emit_label_after (gen_label_rtx (), scan); |
0d7e008e | 1945 | scan = emit_insn_after (gen_align_4 (), scan); |
b9654711 | 1946 | } |
1245df60 R |
1947 | if (p->label) |
1948 | scan = emit_label_after (p->label, scan); | |
0d7e008e SC |
1949 | scan = emit_insn_after (gen_consttable_4 (p->value), scan); |
1950 | break; | |
e577c183 | 1951 | case DFmode: |
0d7e008e SC |
1952 | case DImode: |
1953 | if (need_align) | |
1954 | { | |
1955 | need_align = 0; | |
d3ae8277 | 1956 | scan = emit_label_after (gen_label_rtx (), scan); |
0d7e008e SC |
1957 | scan = emit_insn_after (gen_align_4 (), scan); |
1958 | } | |
1245df60 R |
1959 | if (p->label) |
1960 | scan = emit_label_after (p->label, scan); | |
0d7e008e SC |
1961 | scan = emit_insn_after (gen_consttable_8 (p->value), scan); |
1962 | break; | |
1963 | default: | |
1964 | abort (); | |
1965 | break; | |
b9654711 SC |
1966 | } |
1967 | } | |
b9654711 | 1968 | |
0d7e008e SC |
1969 | scan = emit_insn_after (gen_consttable_end (), scan); |
1970 | scan = emit_barrier_after (scan); | |
1971 | pool_size = 0; | |
1972 | } | |
b9654711 | 1973 | |
8aa2a305 JW |
1974 | /* Return non-zero if constant would be an ok source for a |
1975 | mov.w instead of a mov.l. */ | |
b9654711 | 1976 | |
16bea517 | 1977 | static int |
8aa2a305 | 1978 | hi_const (src) |
0d7e008e | 1979 | rtx src; |
0d7e008e | 1980 | { |
8aa2a305 JW |
1981 | return (GET_CODE (src) == CONST_INT |
1982 | && INTVAL (src) >= -32768 | |
1983 | && INTVAL (src) <= 32767); | |
b9654711 SC |
1984 | } |
1985 | ||
8aa2a305 JW |
1986 | /* Non-zero if the insn is a move instruction which needs to be fixed. */ |
1987 | ||
1988 | /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the | |
1989 | CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't | |
1990 | need to fix it if the input value is CONST_OK_FOR_I. */ | |
1991 | ||
1992 | static int | |
1993 | broken_move (insn) | |
1994 | rtx insn; | |
b9654711 | 1995 | { |
e577c183 JR |
1996 | if (GET_CODE (insn) == INSN) |
1997 | { | |
1998 | rtx pat = PATTERN (insn); | |
1999 | if (GET_CODE (pat) == PARALLEL) | |
2000 | pat = XVECEXP (pat, 0, 0); | |
2001 | if (GET_CODE (pat) == SET | |
2002 | /* We can load any 8 bit value if we don't care what the high | |
2003 | order bits end up as. */ | |
2004 | && GET_MODE (SET_DEST (pat)) != QImode | |
2005 | && CONSTANT_P (SET_SRC (pat)) | |
1245df60 R |
2006 | && ! (TARGET_SH3E |
2007 | && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE | |
e577c183 JR |
2008 | && (fp_zero_operand (SET_SRC (pat)) |
2009 | || fp_one_operand (SET_SRC (pat))) | |
2010 | && GET_CODE (SET_DEST (pat)) == REG | |
2011 | && REGNO (SET_DEST (pat)) >= FIRST_FP_REG | |
2012 | && REGNO (SET_DEST (pat)) <= LAST_FP_REG) | |
2013 | && (GET_CODE (SET_SRC (pat)) != CONST_INT | |
2014 | || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat))))) | |
2015 | return 1; | |
2016 | } | |
d3ae8277 | 2017 | |
8aa2a305 | 2018 | return 0; |
b9654711 | 2019 | } |
b9654711 | 2020 | |
1245df60 R |
2021 | static int |
2022 | mova_p (insn) | |
2023 | rtx insn; | |
2024 | { | |
2025 | return (GET_CODE (insn) == INSN | |
2026 | && GET_CODE (PATTERN (insn)) == SET | |
2027 | && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC | |
2028 | && XINT (SET_SRC (PATTERN (insn)), 1) == 1); | |
2029 | } | |
2030 | ||
8aa2a305 JW |
2031 | /* Find the last barrier from insn FROM which is close enough to hold the |
2032 | constant pool. If we can't find one, then create one near the end of | |
2033 | the range. */ | |
b9654711 | 2034 | |
8aa2a305 | 2035 | static rtx |
1245df60 R |
2036 | find_barrier (num_mova, mova, from) |
2037 | int num_mova; | |
2038 | rtx mova, from; | |
b9654711 | 2039 | { |
0d7e008e SC |
2040 | int count_si = 0; |
2041 | int count_hi = 0; | |
2042 | int found_hi = 0; | |
2043 | int found_si = 0; | |
33f7f353 JR |
2044 | int hi_align = 2; |
2045 | int si_align = 2; | |
1245df60 R |
2046 | int leading_mova = num_mova; |
2047 | rtx barrier_before_mova, found_barrier = 0, good_barrier = 0; | |
c17f53a8 JW |
2048 | int si_limit; |
2049 | int hi_limit; | |
8aa2a305 JW |
2050 | |
2051 | /* For HImode: range is 510, add 4 because pc counts from address of | |
2052 | second instruction after this one, subtract 2 for the jump instruction | |
3adbde60 JW |
2053 | that we may need to emit before the table, subtract 2 for the instruction |
2054 | that fills the jump delay slot (in very rare cases, reorg will take an | |
2055 | instruction from after the constant pool or will leave the delay slot | |
2056 | empty). This gives 510. | |
8aa2a305 JW |
2057 | For SImode: range is 1020, add 4 because pc counts from address of |
2058 | second instruction after this one, subtract 2 in case pc is 2 byte | |
2059 | aligned, subtract 2 for the jump instruction that we may need to emit | |
3adbde60 JW |
2060 | before the table, subtract 2 for the instruction that fills the jump |
2061 | delay slot. This gives 1018. */ | |
c17f53a8 | 2062 | |
1245df60 | 2063 | /* The branch will always be shortened now that the reference address for |
956d6950 | 2064 | forward branches is the successor address, thus we need no longer make |
1245df60 | 2065 | adjustments to the [sh]i_limit for -O0. */ |
c17f53a8 | 2066 | |
1245df60 R |
2067 | si_limit = 1018; |
2068 | hi_limit = 510; | |
e4fa6b06 | 2069 | |
c17f53a8 | 2070 | while (from && count_si < si_limit && count_hi < hi_limit) |
0d7e008e | 2071 | { |
33f7f353 JR |
2072 | int inc = get_attr_length (from); |
2073 | int new_align = 1; | |
1245df60 | 2074 | |
33f7f353 | 2075 | if (GET_CODE (from) == CODE_LABEL) |
77008a44 R |
2076 | { |
2077 | if (optimize) | |
2078 | new_align = 1 << label_to_alignment (from); | |
2079 | else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER) | |
2080 | new_align = 1 << barrier_align (from); | |
2081 | else | |
2082 | new_align = 1; | |
2083 | inc = 0; | |
2084 | } | |
8aa2a305 | 2085 | |
0d7e008e | 2086 | if (GET_CODE (from) == BARRIER) |
1245df60 | 2087 | { |
33f7f353 | 2088 | |
1245df60 | 2089 | found_barrier = from; |
33f7f353 | 2090 | |
956d6950 | 2091 | /* If we are at the end of the function, or in front of an alignment |
1245df60 R |
2092 | instruction, we need not insert an extra alignment. We prefer |
2093 | this kind of barrier. */ | |
33f7f353 | 2094 | if (barrier_align (from) > 2) |
1245df60 R |
2095 | good_barrier = from; |
2096 | } | |
8aa2a305 | 2097 | |
8aa2a305 | 2098 | if (broken_move (from)) |
0d7e008e | 2099 | { |
1245df60 R |
2100 | rtx pat, src, dst; |
2101 | enum machine_mode mode; | |
2102 | ||
2103 | pat = PATTERN (from); | |
2104 | if (GET_CODE (pat) == PARALLEL) | |
2105 | pat = XVECEXP (pat, 0, 0); | |
2106 | src = SET_SRC (pat); | |
2107 | dst = SET_DEST (pat); | |
2108 | mode = GET_MODE (dst); | |
c17f53a8 JW |
2109 | |
2110 | /* We must explicitly check the mode, because sometimes the | |
2111 | front end will generate code to load unsigned constants into | |
2112 | HImode targets without properly sign extending them. */ | |
225e4f43 R |
2113 | if (mode == HImode |
2114 | || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG)) | |
00e94d65 | 2115 | { |
1245df60 | 2116 | found_hi += 2; |
00e94d65 RK |
2117 | /* We put the short constants before the long constants, so |
2118 | we must count the length of short constants in the range | |
2119 | for the long constants. */ | |
2120 | /* ??? This isn't optimal, but is easy to do. */ | |
1245df60 | 2121 | si_limit -= 2; |
00e94d65 | 2122 | } |
0d7e008e | 2123 | else |
1245df60 | 2124 | { |
33f7f353 JR |
2125 | while (si_align > 2 && found_si + si_align - 2 > count_si) |
2126 | si_align >>= 1; | |
1245df60 R |
2127 | if (found_si > count_si) |
2128 | count_si = found_si; | |
2129 | found_si += GET_MODE_SIZE (mode); | |
2130 | if (num_mova) | |
2131 | si_limit -= GET_MODE_SIZE (mode); | |
2132 | } | |
0d7e008e | 2133 | } |
5325c0fa | 2134 | |
33f7f353 | 2135 | if (mova_p (from)) |
1245df60 R |
2136 | { |
2137 | if (! num_mova++) | |
2138 | { | |
2139 | leading_mova = 0; | |
2140 | mova = from; | |
2141 | barrier_before_mova = good_barrier ? good_barrier : found_barrier; | |
2142 | } | |
2143 | if (found_si > count_si) | |
2144 | count_si = found_si; | |
2145 | } | |
5325c0fa JW |
2146 | else if (GET_CODE (from) == JUMP_INSN |
2147 | && (GET_CODE (PATTERN (from)) == ADDR_VEC | |
2148 | || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)) | |
1245df60 R |
2149 | { |
2150 | if (num_mova) | |
2151 | num_mova--; | |
a0798779 | 2152 | if (barrier_align (next_real_insn (from)) == CACHE_LOG) |
1245df60 | 2153 | { |
38e01259 | 2154 | /* We have just passed the barrier in front of the |
a0798779 R |
2155 | ADDR_DIFF_VEC, which is stored in found_barrier. Since |
2156 | the ADDR_DIFF_VEC is accessed as data, just like our pool | |
2157 | constants, this is a good opportunity to accommodate what | |
2158 | we have gathered so far. | |
1245df60 R |
2159 | If we waited any longer, we could end up at a barrier in |
2160 | front of code, which gives worse cache usage for separated | |
2161 | instruction / data caches. */ | |
a0798779 | 2162 | good_barrier = found_barrier; |
1245df60 R |
2163 | break; |
2164 | } | |
a0798779 R |
2165 | else |
2166 | { | |
2167 | rtx body = PATTERN (from); | |
2168 | inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); | |
2169 | } | |
1245df60 | 2170 | } |
5325c0fa | 2171 | |
0d7e008e | 2172 | if (found_si) |
33f7f353 JR |
2173 | { |
2174 | if (new_align > si_align) | |
2175 | { | |
1f3fa20e | 2176 | si_limit -= count_si - 1 & new_align - si_align; |
33f7f353 JR |
2177 | si_align = new_align; |
2178 | } | |
1f3fa20e | 2179 | count_si = count_si + new_align - 1 & -new_align; |
33f7f353 JR |
2180 | count_si += inc; |
2181 | } | |
0d7e008e | 2182 | if (found_hi) |
33f7f353 JR |
2183 | { |
2184 | if (new_align > hi_align) | |
2185 | { | |
1f3fa20e | 2186 | hi_limit -= count_hi - 1 & new_align - hi_align; |
33f7f353 JR |
2187 | hi_align = new_align; |
2188 | } | |
1f3fa20e | 2189 | count_hi = count_hi + new_align - 1 & -new_align; |
33f7f353 JR |
2190 | count_hi += inc; |
2191 | } | |
0d7e008e SC |
2192 | from = NEXT_INSN (from); |
2193 | } | |
2194 | ||
1245df60 R |
2195 | if (num_mova) |
2196 | if (leading_mova) | |
2197 | { | |
2198 | /* Try as we might, the leading mova is out of range. Change | |
2199 | it into a load (which will become a pcload) and retry. */ | |
2200 | SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); | |
2201 | INSN_CODE (mova) = -1; | |
2202 | return find_barrier (0, 0, mova); | |
2203 | } | |
2204 | else | |
2205 | { | |
2206 | /* Insert the constant pool table before the mova instruction, | |
2207 | to prevent the mova label reference from going out of range. */ | |
2208 | from = mova; | |
2209 | good_barrier = found_barrier = barrier_before_mova; | |
2210 | } | |
5325c0fa | 2211 | |
1245df60 R |
2212 | if (found_barrier) |
2213 | { | |
33f7f353 | 2214 | if (good_barrier && next_real_insn (found_barrier)) |
1245df60 | 2215 | found_barrier = good_barrier; |
1245df60 R |
2216 | } |
2217 | else | |
b9654711 | 2218 | { |
8aa2a305 JW |
2219 | /* We didn't find a barrier in time to dump our stuff, |
2220 | so we'll make one. */ | |
0d7e008e | 2221 | rtx label = gen_label_rtx (); |
8aa2a305 | 2222 | |
5dbcc9c0 RK |
2223 | /* If we exceeded the range, then we must back up over the last |
2224 | instruction we looked at. Otherwise, we just need to undo the | |
2225 | NEXT_INSN at the end of the loop. */ | |
c17f53a8 | 2226 | if (count_hi > hi_limit || count_si > si_limit) |
5dbcc9c0 RK |
2227 | from = PREV_INSN (PREV_INSN (from)); |
2228 | else | |
2229 | from = PREV_INSN (from); | |
2230 | ||
8aa2a305 JW |
2231 | /* Walk back to be just before any jump or label. |
2232 | Putting it before a label reduces the number of times the branch | |
2233 | around the constant pool table will be hit. Putting it before | |
2234 | a jump makes it more likely that the bra delay slot will be | |
2235 | filled. */ | |
2236 | while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE | |
8e87e161 | 2237 | || GET_CODE (from) == CODE_LABEL) |
8aa2a305 JW |
2238 | from = PREV_INSN (from); |
2239 | ||
0d7e008e SC |
2240 | from = emit_jump_insn_after (gen_jump (label), from); |
2241 | JUMP_LABEL (from) = label; | |
e081ed26 | 2242 | LABEL_NUSES (label) = 1; |
0d7e008e SC |
2243 | found_barrier = emit_barrier_after (from); |
2244 | emit_label_after (label, found_barrier); | |
b9654711 | 2245 | } |
b9654711 | 2246 | |
8aa2a305 | 2247 | return found_barrier; |
0d7e008e | 2248 | } |
b9654711 | 2249 | |
4787bce0 JW |
2250 | /* If the instruction INSN is implemented by a special function, and we can |
2251 | positively find the register that is used to call the sfunc, and this | |
2252 | register is not used anywhere else in this instruction - except as the | |
2253 | destination of a set, return this register; else, return 0. */ | |
1245df60 | 2254 | rtx |
4787bce0 JW |
2255 | sfunc_uses_reg (insn) |
2256 | rtx insn; | |
2257 | { | |
2258 | int i; | |
2259 | rtx pattern, part, reg_part, reg; | |
2260 | ||
2261 | if (GET_CODE (insn) != INSN) | |
2262 | return 0; | |
2263 | pattern = PATTERN (insn); | |
2264 | if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) | |
2265 | return 0; | |
2266 | ||
2267 | for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) | |
2268 | { | |
2269 | part = XVECEXP (pattern, 0, i); | |
1245df60 | 2270 | if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) |
4787bce0 JW |
2271 | reg_part = part; |
2272 | } | |
2273 | if (! reg_part) | |
2274 | return 0; | |
2275 | reg = XEXP (reg_part, 0); | |
2276 | for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--) | |
2277 | { | |
2278 | part = XVECEXP (pattern, 0, i); | |
225e4f43 | 2279 | if (part == reg_part || GET_CODE (part) == CLOBBER) |
4787bce0 JW |
2280 | continue; |
2281 | if (reg_mentioned_p (reg, ((GET_CODE (part) == SET | |
2282 | && GET_CODE (SET_DEST (part)) == REG) | |
2283 | ? SET_SRC (part) : part))) | |
2284 | return 0; | |
2285 | } | |
2286 | return reg; | |
2287 | } | |
2288 | ||
933c3ba3 JW |
2289 | /* See if the only way in which INSN uses REG is by calling it, or by |
2290 | setting it while calling it. Set *SET to a SET rtx if the register | |
2291 | is set by INSN. */ | |
2292 | ||
2293 | static int | |
2294 | noncall_uses_reg (reg, insn, set) | |
2295 | rtx reg; | |
2296 | rtx insn; | |
2297 | rtx *set; | |
2298 | { | |
4787bce0 | 2299 | rtx pattern, reg2; |
933c3ba3 JW |
2300 | |
2301 | *set = NULL_RTX; | |
2302 | ||
4787bce0 JW |
2303 | reg2 = sfunc_uses_reg (insn); |
2304 | if (reg2 && REGNO (reg2) == REGNO (reg)) | |
2305 | { | |
2306 | pattern = single_set (insn); | |
2307 | if (pattern | |
2308 | && GET_CODE (SET_DEST (pattern)) == REG | |
2309 | && REGNO (reg) == REGNO (SET_DEST (pattern))) | |
2310 | *set = pattern; | |
2311 | return 0; | |
2312 | } | |
933c3ba3 JW |
2313 | if (GET_CODE (insn) != CALL_INSN) |
2314 | { | |
2315 | /* We don't use rtx_equal_p because we don't care if the mode is | |
2316 | different. */ | |
2317 | pattern = single_set (insn); | |
2318 | if (pattern | |
2319 | && GET_CODE (SET_DEST (pattern)) == REG | |
2320 | && REGNO (reg) == REGNO (SET_DEST (pattern))) | |
2321 | { | |
4787bce0 JW |
2322 | rtx par, part; |
2323 | int i; | |
2324 | ||
933c3ba3 | 2325 | *set = pattern; |
4787bce0 JW |
2326 | par = PATTERN (insn); |
2327 | if (GET_CODE (par) == PARALLEL) | |
2328 | for (i = XVECLEN (par, 0) - 1; i >= 0; i--) | |
2329 | { | |
2330 | part = XVECEXP (par, 0, i); | |
2331 | if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) | |
2332 | return 1; | |
2333 | } | |
2334 | return reg_mentioned_p (reg, SET_SRC (pattern)); | |
933c3ba3 JW |
2335 | } |
2336 | ||
2337 | return 1; | |
2338 | } | |
2339 | ||
2340 | pattern = PATTERN (insn); | |
2341 | ||
2342 | if (GET_CODE (pattern) == PARALLEL) | |
2343 | { | |
2344 | int i; | |
2345 | ||
2346 | for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--) | |
2347 | if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) | |
2348 | return 1; | |
2349 | pattern = XVECEXP (pattern, 0, 0); | |
2350 | } | |
2351 | ||
2352 | if (GET_CODE (pattern) == SET) | |
2353 | { | |
2354 | if (reg_mentioned_p (reg, SET_DEST (pattern))) | |
2355 | { | |
2356 | /* We don't use rtx_equal_p, because we don't care if the | |
2357 | mode is different. */ | |
2358 | if (GET_CODE (SET_DEST (pattern)) != REG | |
2359 | || REGNO (reg) != REGNO (SET_DEST (pattern))) | |
2360 | return 1; | |
2361 | ||
2362 | *set = pattern; | |
2363 | } | |
2364 | ||
2365 | pattern = SET_SRC (pattern); | |
2366 | } | |
2367 | ||
2368 | if (GET_CODE (pattern) != CALL | |
2369 | || GET_CODE (XEXP (pattern, 0)) != MEM | |
2370 | || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) | |
2371 | return 1; | |
2372 | ||
2373 | return 0; | |
2374 | } | |
2375 | ||
1245df60 R |
2376 | /* Given a X, a pattern of an insn or a part of it, return a mask of used |
2377 | general registers. Bits 0..15 mean that the respective registers | |
2378 | are used as inputs in the instruction. Bits 16..31 mean that the | |
2379 | registers 0..15, respectively, are used as outputs, or are clobbered. | |
2380 | IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ | |
2381 | int | |
2382 | regs_used (x, is_dest) | |
2383 | rtx x; int is_dest; | |
2384 | { | |
2385 | enum rtx_code code; | |
6f7d635c | 2386 | const char *fmt; |
1245df60 R |
2387 | int i, used = 0; |
2388 | ||
2389 | if (! x) | |
2390 | return used; | |
2391 | code = GET_CODE (x); | |
2392 | switch (code) | |
2393 | { | |
2394 | case REG: | |
2395 | if (REGNO (x) < 16) | |
2396 | return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) | |
2397 | << (REGNO (x) + is_dest)); | |
2398 | return 0; | |
2399 | case SUBREG: | |
2400 | { | |
2401 | rtx y = SUBREG_REG (x); | |
2402 | ||
2403 | if (GET_CODE (y) != REG) | |
2404 | break; | |
2405 | if (REGNO (y) < 16) | |
2406 | return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1) | |
2407 | << (REGNO (y) + SUBREG_WORD (x) + is_dest)); | |
2408 | return 0; | |
2409 | } | |
2410 | case SET: | |
2411 | return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); | |
2412 | case RETURN: | |
2413 | /* If there was a return value, it must have been indicated with USE. */ | |
2414 | return 0x00ffff00; | |
2415 | case CLOBBER: | |
2416 | is_dest = 1; | |
2417 | break; | |
2418 | case MEM: | |
2419 | is_dest = 0; | |
2420 | break; | |
2421 | case CALL: | |
2422 | used |= 0x00ff00f0; | |
2423 | break; | |
2424 | } | |
2425 | ||
2426 | fmt = GET_RTX_FORMAT (code); | |
2427 | ||
2428 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
2429 | { | |
2430 | if (fmt[i] == 'E') | |
2431 | { | |
2432 | register int j; | |
2433 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
2434 | used |= regs_used (XVECEXP (x, i, j), is_dest); | |
2435 | } | |
2436 | else if (fmt[i] == 'e') | |
2437 | used |= regs_used (XEXP (x, i), is_dest); | |
2438 | } | |
2439 | return used; | |
2440 | } | |
2441 | ||
2442 | /* Create an instruction that prevents redirection of a conditional branch | |
956d6950 | 2443 | to the destination of the JUMP with address ADDR. |
1245df60 R |
2444 | If the branch needs to be implemented as an indirect jump, try to find |
2445 | a scratch register for it. | |
2446 | If NEED_BLOCK is 0, don't do anything unless we need a scratch register. | |
2447 | If any preceding insn that doesn't fit into a delay slot is good enough, | |
2448 | pass 1. Pass 2 if a definite blocking insn is needed. | |
2449 | -1 is used internally to avoid deep recursion. | |
2450 | If a blocking instruction is made or recognized, return it. */ | |
2451 | ||
2452 | static rtx | |
2453 | gen_block_redirect (jump, addr, need_block) | |
2454 | rtx jump; | |
2455 | int addr, need_block; | |
2456 | { | |
2457 | int dead = 0; | |
2458 | rtx prev = prev_nonnote_insn (jump); | |
2459 | rtx dest; | |
2460 | ||
2461 | /* First, check if we already have an instruction that satisfies our need. */ | |
2462 | if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev)) | |
2463 | { | |
2464 | if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) | |
2465 | return prev; | |
2466 | if (GET_CODE (PATTERN (prev)) == USE | |
2467 | || GET_CODE (PATTERN (prev)) == CLOBBER | |
2468 | || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) | |
2469 | prev = jump; | |
2470 | else if ((need_block &= ~1) < 0) | |
2471 | return prev; | |
2472 | else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) | |
2473 | need_block = 0; | |
2474 | } | |
2475 | /* We can't use JUMP_LABEL here because it might be undefined | |
2476 | when not optimizing. */ | |
2477 | dest = XEXP (SET_SRC (PATTERN (jump)), 0); | |
2478 | /* If the branch is out of range, try to find a scratch register for it. */ | |
2479 | if (optimize | |
2480 | && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)) | |
2481 | { | |
2482 | rtx scan; | |
2483 | /* Don't look for the stack pointer as a scratch register, | |
956d6950 | 2484 | it would cause trouble if an interrupt occurred. */ |
1245df60 R |
2485 | unsigned try = 0x7fff, used; |
2486 | int jump_left = flag_expensive_optimizations + 1; | |
2487 | ||
2488 | /* It is likely that the most recent eligible instruction is wanted for | |
2489 | the delay slot. Therefore, find out which registers it uses, and | |
2490 | try to avoid using them. */ | |
2491 | ||
2492 | for (scan = jump; scan = PREV_INSN (scan); ) | |
2493 | { | |
2494 | enum rtx_code code; | |
2495 | ||
2496 | if (INSN_DELETED_P (scan)) | |
2497 | continue; | |
2498 | code = GET_CODE (scan); | |
2499 | if (code == CODE_LABEL || code == JUMP_INSN) | |
2500 | break; | |
2501 | if (code == INSN | |
2502 | && GET_CODE (PATTERN (scan)) != USE | |
2503 | && GET_CODE (PATTERN (scan)) != CLOBBER | |
2504 | && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) | |
2505 | { | |
2506 | try &= ~regs_used (PATTERN (scan), 0); | |
2507 | break; | |
2508 | } | |
2509 | } | |
2510 | for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); ) | |
2511 | { | |
2512 | enum rtx_code code; | |
2513 | ||
2514 | if (INSN_DELETED_P (scan)) | |
2515 | continue; | |
2516 | code = GET_CODE (scan); | |
2517 | if (GET_RTX_CLASS (code) == 'i') | |
2518 | { | |
2519 | used |= regs_used (PATTERN (scan), 0); | |
2520 | if (code == CALL_INSN) | |
2521 | used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); | |
2522 | dead |= (used >> 16) & ~used; | |
2523 | if (dead & try) | |
2524 | { | |
2525 | dead &= try; | |
2526 | break; | |
2527 | } | |
2528 | if (code == JUMP_INSN) | |
2529 | if (jump_left-- && simplejump_p (scan)) | |
2530 | scan = JUMP_LABEL (scan); | |
2531 | else | |
2532 | break; | |
2533 | } | |
2534 | } | |
2535 | /* Mask out the stack pointer again, in case it was | |
2536 | the only 'free' register we have found. */ | |
2537 | dead &= 0x7fff; | |
2538 | } | |
2539 | /* If the immediate destination is still in range, check for possible | |
2540 | threading with a jump beyond the delay slot insn. | |
2541 | Don't check if we are called recursively; the jump has been or will be | |
956d6950 | 2542 | checked in a different invocation then. */ |
1245df60 R |
2543 | |
2544 | else if (optimize && need_block >= 0) | |
2545 | { | |
2546 | rtx next = next_active_insn (next_active_insn (dest)); | |
2547 | if (next && GET_CODE (next) == JUMP_INSN | |
2548 | && GET_CODE (PATTERN (next)) == SET | |
2549 | && recog_memoized (next) == CODE_FOR_jump) | |
2550 | { | |
2551 | dest = JUMP_LABEL (next); | |
2552 | if (dest | |
2553 | && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098) | |
2554 | gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1); | |
2555 | } | |
2556 | } | |
2557 | ||
2558 | if (dead) | |
2559 | { | |
c5c76735 | 2560 | rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); |
1245df60 R |
2561 | |
2562 | /* It would be nice if we could convert the jump into an indirect | |
956d6950 | 2563 | jump / far branch right now, and thus exposing all constituent |
1245df60 R |
2564 | instructions to further optimization. However, reorg uses |
2565 | simplejump_p to determine if there is an unconditional jump where | |
2566 | it should try to schedule instructions from the target of the | |
2567 | branch; simplejump_p fails for indirect jumps even if they have | |
2568 | a JUMP_LABEL. */ | |
2569 | rtx insn = emit_insn_before (gen_indirect_jump_scratch | |
2570 | (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump)))) | |
2571 | , jump); | |
2572 | INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; | |
2573 | return insn; | |
2574 | } | |
2575 | else if (need_block) | |
2576 | /* We can't use JUMP_LABEL here because it might be undefined | |
2577 | when not optimizing. */ | |
2578 | return emit_insn_before (gen_block_branch_redirect | |
2579 | (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0)))) | |
2580 | , jump); | |
2581 | return prev; | |
2582 | } | |
2583 | ||
2584 | #define CONDJUMP_MIN -252 | |
2585 | #define CONDJUMP_MAX 262 | |
2586 | struct far_branch | |
2587 | { | |
2588 | /* A label (to be placed) in front of the jump | |
2589 | that jumps to our ultimate destination. */ | |
2590 | rtx near_label; | |
2591 | /* Where we are going to insert it if we cannot move the jump any farther, | |
2592 | or the jump itself if we have picked up an existing jump. */ | |
2593 | rtx insert_place; | |
2594 | /* The ultimate destination. */ | |
2595 | rtx far_label; | |
2596 | struct far_branch *prev; | |
2597 | /* If the branch has already been created, its address; | |
2598 | else the address of its first prospective user. */ | |
2599 | int address; | |
2600 | }; | |
2601 | ||
2602 | enum mdep_reorg_phase_e mdep_reorg_phase; | |
2603 | void | |
2604 | gen_far_branch (bp) | |
2605 | struct far_branch *bp; | |
2606 | { | |
2607 | rtx insn = bp->insert_place; | |
2608 | rtx jump; | |
2609 | rtx label = gen_label_rtx (); | |
2610 | ||
2611 | emit_label_after (label, insn); | |
2612 | if (bp->far_label) | |
2613 | { | |
2614 | jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); | |
2615 | LABEL_NUSES (bp->far_label)++; | |
2616 | } | |
2617 | else | |
2618 | jump = emit_jump_insn_after (gen_return (), insn); | |
225e4f43 R |
2619 | /* Emit a barrier so that reorg knows that any following instructions |
2620 | are not reachable via a fall-through path. | |
2621 | But don't do this when not optimizing, since we wouldn't supress the | |
2622 | alignment for the barrier then, and could end up with out-of-range | |
2623 | pc-relative loads. */ | |
2624 | if (optimize) | |
2625 | emit_barrier_after (jump); | |
1245df60 R |
2626 | emit_label_after (bp->near_label, insn); |
2627 | JUMP_LABEL (jump) = bp->far_label; | |
2628 | if (! invert_jump (insn, label)) | |
2629 | abort (); | |
2630 | /* Prevent reorg from undoing our splits. */ | |
2631 | gen_block_redirect (jump, bp->address += 2, 2); | |
2632 | } | |
2633 | ||
1245df60 R |
2634 | /* Fix up ADDR_DIFF_VECs. */ |
2635 | void | |
2636 | fixup_addr_diff_vecs (first) | |
2637 | rtx first; | |
2638 | { | |
2639 | rtx insn; | |
33f7f353 | 2640 | |
1245df60 R |
2641 | for (insn = first; insn; insn = NEXT_INSN (insn)) |
2642 | { | |
eb3881bf | 2643 | rtx vec_lab, pat, prev, prevpat, x, braf_label; |
1245df60 R |
2644 | |
2645 | if (GET_CODE (insn) != JUMP_INSN | |
2646 | || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) | |
2647 | continue; | |
2648 | pat = PATTERN (insn); | |
33f7f353 | 2649 | vec_lab = XEXP (XEXP (pat, 0), 0); |
1245df60 | 2650 | |
33f7f353 JR |
2651 | /* Search the matching casesi_jump_2. */ |
2652 | for (prev = vec_lab; ; prev = PREV_INSN (prev)) | |
1245df60 | 2653 | { |
33f7f353 JR |
2654 | if (GET_CODE (prev) != JUMP_INSN) |
2655 | continue; | |
2656 | prevpat = PATTERN (prev); | |
2657 | if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) | |
2658 | continue; | |
2659 | x = XVECEXP (prevpat, 0, 1); | |
2660 | if (GET_CODE (x) != USE) | |
2661 | continue; | |
2662 | x = XEXP (x, 0); | |
2663 | if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) | |
2664 | break; | |
1245df60 | 2665 | } |
eb3881bf R |
2666 | |
2667 | /* Emit the reference label of the braf where it belongs, right after | |
2668 | the casesi_jump_2 (i.e. braf). */ | |
2669 | braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); | |
2670 | emit_label_after (braf_label, prev); | |
2671 | ||
33f7f353 JR |
2672 | /* Fix up the ADDR_DIF_VEC to be relative |
2673 | to the reference address of the braf. */ | |
eb3881bf | 2674 | XEXP (XEXP (pat, 0), 0) = braf_label; |
1245df60 | 2675 | } |
1245df60 R |
2676 | } |
2677 | ||
33f7f353 JR |
2678 | /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following |
2679 | a barrier. Return the base 2 logarithm of the desired alignment. */ | |
1245df60 | 2680 | int |
33f7f353 JR |
2681 | barrier_align (barrier_or_label) |
2682 | rtx barrier_or_label; | |
1245df60 | 2683 | { |
33f7f353 JR |
2684 | rtx next = next_real_insn (barrier_or_label), pat, prev; |
2685 | int slot, credit; | |
2686 | ||
2687 | if (! next) | |
1245df60 | 2688 | return 0; |
1245df60 | 2689 | |
33f7f353 | 2690 | pat = PATTERN (next); |
1245df60 | 2691 | |
33f7f353 JR |
2692 | if (GET_CODE (pat) == ADDR_DIFF_VEC) |
2693 | return 2; | |
1245df60 | 2694 | |
33f7f353 JR |
2695 | if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1) |
2696 | /* This is a barrier in front of a constant table. */ | |
2697 | return 0; | |
2698 | ||
2699 | prev = prev_real_insn (barrier_or_label); | |
2700 | if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC) | |
1245df60 | 2701 | { |
33f7f353 JR |
2702 | pat = PATTERN (prev); |
2703 | /* If this is a very small table, we want to keep the alignment after | |
2704 | the table to the minimum for proper code alignment. */ | |
2705 | return ((TARGET_SMALLCODE | |
2706 | || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) | |
2707 | <= 1 << (CACHE_LOG - 2))) | |
2708 | ? 1 : CACHE_LOG); | |
1245df60 | 2709 | } |
33f7f353 JR |
2710 | |
2711 | if (TARGET_SMALLCODE) | |
2712 | return 0; | |
2713 | ||
4d070fd3 | 2714 | if (! TARGET_SH2 || ! optimize) |
33f7f353 JR |
2715 | return CACHE_LOG; |
2716 | ||
225e4f43 R |
2717 | /* When fixing up pcloads, a constant table might be inserted just before |
2718 | the basic block that ends with the barrier. Thus, we can't trust the | |
2719 | instruction lengths before that. */ | |
2720 | if (mdep_reorg_phase > SH_FIXUP_PCLOAD) | |
1245df60 | 2721 | { |
225e4f43 R |
2722 | /* Check if there is an immediately preceding branch to the insn beyond |
2723 | the barrier. We must weight the cost of discarding useful information | |
2724 | from the current cache line when executing this branch and there is | |
2725 | an alignment, against that of fetching unneeded insn in front of the | |
2726 | branch target when there is no alignment. */ | |
2727 | ||
2728 | /* PREV is presumed to be the JUMP_INSN for the barrier under | |
2729 | investigation. Skip to the insn before it. */ | |
2730 | prev = prev_real_insn (prev); | |
2731 | ||
2732 | for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2; | |
2733 | credit >= 0 && prev && GET_CODE (prev) == INSN; | |
2734 | prev = prev_real_insn (prev)) | |
2735 | { | |
2736 | if (GET_CODE (PATTERN (prev)) == USE | |
2737 | || GET_CODE (PATTERN (prev)) == CLOBBER) | |
2738 | continue; | |
2739 | if (GET_CODE (PATTERN (prev)) == SEQUENCE) | |
2740 | prev = XVECEXP (PATTERN (prev), 0, 1); | |
2741 | if (slot && | |
2742 | get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) | |
2743 | slot = 0; | |
2744 | credit -= get_attr_length (prev); | |
2745 | } | |
2746 | if (prev | |
2747 | && GET_CODE (prev) == JUMP_INSN | |
2748 | && JUMP_LABEL (prev) | |
2749 | && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label) | |
2750 | && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0))) | |
2751 | return 0; | |
33f7f353 | 2752 | } |
33f7f353 JR |
2753 | |
2754 | return CACHE_LOG; | |
1245df60 R |
2755 | } |
2756 | ||
8aa2a305 | 2757 | /* Exported to toplev.c. |
b9654711 | 2758 | |
933c3ba3 JW |
2759 | Do a final pass over the function, just before delayed branch |
2760 | scheduling. */ | |
b9654711 | 2761 | |
0d7e008e SC |
2762 | void |
2763 | machine_dependent_reorg (first) | |
2764 | rtx first; | |
2765 | { | |
1245df60 R |
2766 | rtx insn, mova; |
2767 | int num_mova; | |
c5c76735 JL |
2768 | rtx r0_rtx = gen_rtx_REG (Pmode, 0); |
2769 | rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); | |
8aa2a305 | 2770 | |
933c3ba3 JW |
2771 | /* If relaxing, generate pseudo-ops to associate function calls with |
2772 | the symbols they call. It does no harm to not generate these | |
2773 | pseudo-ops. However, when we can generate them, it enables to | |
2774 | linker to potentially relax the jsr to a bsr, and eliminate the | |
2775 | register load and, possibly, the constant pool entry. */ | |
2776 | ||
1245df60 | 2777 | mdep_reorg_phase = SH_INSERT_USES_LABELS; |
933c3ba3 JW |
2778 | if (TARGET_RELAX) |
2779 | { | |
2780 | /* Remove all REG_LABEL notes. We want to use them for our own | |
2781 | purposes. This works because none of the remaining passes | |
2782 | need to look at them. | |
2783 | ||
2784 | ??? But it may break in the future. We should use a machine | |
2785 | dependent REG_NOTE, or some other approach entirely. */ | |
2786 | for (insn = first; insn; insn = NEXT_INSN (insn)) | |
2787 | { | |
2788 | if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') | |
2789 | { | |
2790 | rtx note; | |
2791 | ||
2792 | while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0) | |
2793 | remove_note (insn, note); | |
2794 | } | |
2795 | } | |
2796 | ||
2797 | for (insn = first; insn; insn = NEXT_INSN (insn)) | |
2798 | { | |
2799 | rtx pattern, reg, link, set, scan, dies, label; | |
2800 | int rescan = 0, foundinsn = 0; | |
2801 | ||
4787bce0 JW |
2802 | if (GET_CODE (insn) == CALL_INSN) |
2803 | { | |
2804 | pattern = PATTERN (insn); | |
933c3ba3 | 2805 | |
4787bce0 JW |
2806 | if (GET_CODE (pattern) == PARALLEL) |
2807 | pattern = XVECEXP (pattern, 0, 0); | |
2808 | if (GET_CODE (pattern) == SET) | |
2809 | pattern = SET_SRC (pattern); | |
933c3ba3 | 2810 | |
4787bce0 JW |
2811 | if (GET_CODE (pattern) != CALL |
2812 | || GET_CODE (XEXP (pattern, 0)) != MEM) | |
2813 | continue; | |
933c3ba3 | 2814 | |
4787bce0 JW |
2815 | reg = XEXP (XEXP (pattern, 0), 0); |
2816 | } | |
2817 | else | |
2818 | { | |
2819 | reg = sfunc_uses_reg (insn); | |
2820 | if (! reg) | |
2821 | continue; | |
2822 | } | |
933c3ba3 | 2823 | |
933c3ba3 JW |
2824 | if (GET_CODE (reg) != REG) |
2825 | continue; | |
2826 | ||
2827 | /* This is a function call via REG. If the only uses of REG | |
2828 | between the time that it is set and the time that it dies | |
2829 | are in function calls, then we can associate all the | |
2830 | function calls with the setting of REG. */ | |
2831 | ||
2832 | for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) | |
2833 | { | |
1075deda ILT |
2834 | if (REG_NOTE_KIND (link) != 0) |
2835 | continue; | |
933c3ba3 JW |
2836 | set = single_set (XEXP (link, 0)); |
2837 | if (set && rtx_equal_p (reg, SET_DEST (set))) | |
2838 | { | |
2839 | link = XEXP (link, 0); | |
2840 | break; | |
2841 | } | |
2842 | } | |
2843 | ||
2844 | if (! link) | |
2845 | { | |
2846 | /* ??? Sometimes global register allocation will have | |
2847 | deleted the insn pointed to by LOG_LINKS. Try | |
2848 | scanning backward to find where the register is set. */ | |
2849 | for (scan = PREV_INSN (insn); | |
2850 | scan && GET_CODE (scan) != CODE_LABEL; | |
2851 | scan = PREV_INSN (scan)) | |
2852 | { | |
2853 | if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') | |
2854 | continue; | |
2855 | ||
2856 | if (! reg_mentioned_p (reg, scan)) | |
2857 | continue; | |
2858 | ||
2859 | if (noncall_uses_reg (reg, scan, &set)) | |
2860 | break; | |
2861 | ||
2862 | if (set) | |
2863 | { | |
2864 | link = scan; | |
2865 | break; | |
2866 | } | |
2867 | } | |
2868 | } | |
2869 | ||
2870 | if (! link) | |
2871 | continue; | |
2872 | ||
2873 | /* The register is set at LINK. */ | |
2874 | ||
2875 | /* We can only optimize the function call if the register is | |
2876 | being set to a symbol. In theory, we could sometimes | |
2877 | optimize calls to a constant location, but the assembler | |
2878 | and linker do not support that at present. */ | |
2879 | if (GET_CODE (SET_SRC (set)) != SYMBOL_REF | |
2880 | && GET_CODE (SET_SRC (set)) != LABEL_REF) | |
2881 | continue; | |
2882 | ||
2883 | /* Scan forward from LINK to the place where REG dies, and | |
2884 | make sure that the only insns which use REG are | |
2885 | themselves function calls. */ | |
2886 | ||
c1e8e6bd JW |
2887 | /* ??? This doesn't work for call targets that were allocated |
2888 | by reload, since there may not be a REG_DEAD note for the | |
2889 | register. */ | |
2890 | ||
933c3ba3 JW |
2891 | dies = NULL_RTX; |
2892 | for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) | |
2893 | { | |
2894 | rtx scanset; | |
2895 | ||
c1e8e6bd JW |
2896 | /* Don't try to trace forward past a CODE_LABEL if we haven't |
2897 | seen INSN yet. Ordinarily, we will only find the setting insn | |
2898 | in LOG_LINKS if it is in the same basic block. However, | |
2899 | cross-jumping can insert code labels in between the load and | |
2900 | the call, and can result in situations where a single call | |
2901 | insn may have two targets depending on where we came from. */ | |
2902 | ||
2903 | if (GET_CODE (scan) == CODE_LABEL && ! foundinsn) | |
2904 | break; | |
2905 | ||
933c3ba3 JW |
2906 | if (GET_RTX_CLASS (GET_CODE (scan)) != 'i') |
2907 | continue; | |
2908 | ||
2909 | /* Don't try to trace forward past a JUMP. To optimize | |
2910 | safely, we would have to check that all the | |
c1e8e6bd | 2911 | instructions at the jump destination did not use REG. */ |
933c3ba3 | 2912 | |
c1e8e6bd | 2913 | if (GET_CODE (scan) == JUMP_INSN) |
933c3ba3 JW |
2914 | break; |
2915 | ||
2916 | if (! reg_mentioned_p (reg, scan)) | |
2917 | continue; | |
2918 | ||
2919 | if (noncall_uses_reg (reg, scan, &scanset)) | |
2920 | break; | |
2921 | ||
2922 | if (scan == insn) | |
2923 | foundinsn = 1; | |
2924 | ||
4787bce0 JW |
2925 | if (scan != insn |
2926 | && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan))) | |
933c3ba3 JW |
2927 | { |
2928 | /* There is a function call to this register other | |
2929 | than the one we are checking. If we optimize | |
2930 | this call, we need to rescan again below. */ | |
2931 | rescan = 1; | |
2932 | } | |
2933 | ||
2934 | /* ??? We shouldn't have to worry about SCANSET here. | |
2935 | We should just be able to check for a REG_DEAD note | |
2936 | on a function call. However, the REG_DEAD notes are | |
2937 | apparently not dependable around libcalls; c-torture | |
2938 | execute/920501-2 is a test case. If SCANSET is set, | |
2939 | then this insn sets the register, so it must have | |
2940 | died earlier. Unfortunately, this will only handle | |
2941 | the cases in which the register is, in fact, set in a | |
2942 | later insn. */ | |
2943 | ||
2944 | /* ??? We shouldn't have to use FOUNDINSN here. | |
2945 | However, the LOG_LINKS fields are apparently not | |
2946 | entirely reliable around libcalls; | |
2947 | newlib/libm/math/e_pow.c is a test case. Sometimes | |
2948 | an insn will appear in LOG_LINKS even though it is | |
2949 | not the most recent insn which sets the register. */ | |
2950 | ||
2951 | if (foundinsn | |
2952 | && (scanset | |
2953 | || find_reg_note (scan, REG_DEAD, reg))) | |
2954 | { | |
2955 | dies = scan; | |
2956 | break; | |
2957 | } | |
2958 | } | |
2959 | ||
2960 | if (! dies) | |
2961 | { | |
2962 | /* Either there was a branch, or some insn used REG | |
2963 | other than as a function call address. */ | |
2964 | continue; | |
2965 | } | |
2966 | ||
2967 | /* Create a code label, and put it in a REG_LABEL note on | |
2968 | the insn which sets the register, and on each call insn | |
2969 | which uses the register. In final_prescan_insn we look | |
2970 | for the REG_LABEL notes, and output the appropriate label | |
2971 | or pseudo-op. */ | |
2972 | ||
2973 | label = gen_label_rtx (); | |
c5c76735 JL |
2974 | REG_NOTES (link) = gen_rtx_EXPR_LIST (REG_LABEL, label, |
2975 | REG_NOTES (link)); | |
2976 | REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, label, | |
2977 | REG_NOTES (insn)); | |
933c3ba3 JW |
2978 | if (rescan) |
2979 | { | |
2980 | scan = link; | |
2981 | do | |
2982 | { | |
4787bce0 JW |
2983 | rtx reg2; |
2984 | ||
933c3ba3 JW |
2985 | scan = NEXT_INSN (scan); |
2986 | if (scan != insn | |
4787bce0 JW |
2987 | && ((GET_CODE (scan) == CALL_INSN |
2988 | && reg_mentioned_p (reg, scan)) | |
2989 | || ((reg2 = sfunc_uses_reg (scan)) | |
2990 | && REGNO (reg2) == REGNO (reg)))) | |
c5c76735 JL |
2991 | REG_NOTES (scan) |
2992 | = gen_rtx_EXPR_LIST (REG_LABEL, label, REG_NOTES (scan)); | |
933c3ba3 JW |
2993 | } |
2994 | while (scan != dies); | |
2995 | } | |
2996 | } | |
2997 | } | |
2998 | ||
33f7f353 JR |
2999 | if (TARGET_SH2) |
3000 | fixup_addr_diff_vecs (first); | |
1245df60 R |
3001 | |
3002 | if (optimize) | |
3003 | { | |
1245df60 R |
3004 | mdep_reorg_phase = SH_SHORTEN_BRANCHES0; |
3005 | shorten_branches (first); | |
3006 | } | |
933c3ba3 JW |
3007 | /* Scan the function looking for move instructions which have to be |
3008 | changed to pc-relative loads and insert the literal tables. */ | |
3009 | ||
1245df60 R |
3010 | mdep_reorg_phase = SH_FIXUP_PCLOAD; |
3011 | for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) | |
0d7e008e | 3012 | { |
1245df60 R |
3013 | if (mova_p (insn)) |
3014 | { | |
3015 | if (! num_mova++) | |
3016 | mova = insn; | |
3017 | } | |
3018 | else if (GET_CODE (insn) == JUMP_INSN | |
3019 | && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC | |
3020 | && num_mova) | |
3021 | { | |
3022 | rtx scan; | |
3023 | int total; | |
3024 | ||
3025 | num_mova--; | |
3026 | ||
3027 | /* Some code might have been inserted between the mova and | |
3028 | its ADDR_DIFF_VEC. Check if the mova is still in range. */ | |
3029 | for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) | |
33f7f353 | 3030 | total += get_attr_length (scan); |
1245df60 R |
3031 | |
3032 | /* range of mova is 1020, add 4 because pc counts from address of | |
3033 | second instruction after this one, subtract 2 in case pc is 2 | |
3034 | byte aligned. Possible alignment needed for the ADDR_DIFF_VEC | |
956d6950 | 3035 | cancels out with alignment effects of the mova itself. */ |
1245df60 R |
3036 | if (total > 1022) |
3037 | { | |
3038 | /* Change the mova into a load, and restart scanning | |
3039 | there. broken_move will then return true for mova. */ | |
3040 | SET_SRC (PATTERN (mova)) | |
3041 | = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); | |
3042 | INSN_CODE (mova) = -1; | |
3043 | insn = mova; | |
3044 | } | |
3045 | } | |
0d7e008e SC |
3046 | if (broken_move (insn)) |
3047 | { | |
0d7e008e | 3048 | rtx scan; |
8aa2a305 JW |
3049 | /* Scan ahead looking for a barrier to stick the constant table |
3050 | behind. */ | |
1245df60 R |
3051 | rtx barrier = find_barrier (num_mova, mova, insn); |
3052 | rtx last_float_move, last_float = 0, *last_float_addr; | |
b9654711 | 3053 | |
1245df60 R |
3054 | if (num_mova && ! mova_p (mova)) |
3055 | { | |
3056 | /* find_barrier had to change the first mova into a | |
3057 | pcload; thus, we have to start with this new pcload. */ | |
3058 | insn = mova; | |
3059 | num_mova = 0; | |
3060 | } | |
16bea517 | 3061 | /* Now find all the moves between the points and modify them. */ |
0d7e008e SC |
3062 | for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) |
3063 | { | |
1245df60 R |
3064 | if (GET_CODE (scan) == CODE_LABEL) |
3065 | last_float = 0; | |
0d7e008e SC |
3066 | if (broken_move (scan)) |
3067 | { | |
e577c183 JR |
3068 | rtx *patp = &PATTERN (scan), pat = *patp; |
3069 | rtx src, dst; | |
0d7e008e SC |
3070 | rtx lab; |
3071 | rtx newinsn; | |
3072 | rtx newsrc; | |
e577c183 JR |
3073 | enum machine_mode mode; |
3074 | ||
3075 | if (GET_CODE (pat) == PARALLEL) | |
3076 | patp = &XVECEXP (pat, 0, 0), pat = *patp; | |
3077 | src = SET_SRC (pat); | |
3078 | dst = SET_DEST (pat); | |
3079 | mode = GET_MODE (dst); | |
0d7e008e | 3080 | |
225e4f43 R |
3081 | if (mode == SImode && hi_const (src) |
3082 | && REGNO (dst) != FPUL_REG) | |
0d7e008e | 3083 | { |
23ed92ca | 3084 | int offset = 0; |
8aa2a305 | 3085 | |
0d7e008e | 3086 | mode = HImode; |
d3ae8277 | 3087 | while (GET_CODE (dst) == SUBREG) |
23ed92ca JW |
3088 | { |
3089 | offset += SUBREG_WORD (dst); | |
3090 | dst = SUBREG_REG (dst); | |
3091 | } | |
c5c76735 | 3092 | dst = gen_rtx_REG (HImode, REGNO (dst) + offset); |
0d7e008e | 3093 | } |
8aa2a305 | 3094 | |
1245df60 R |
3095 | if (GET_CODE (dst) == REG |
3096 | && ((REGNO (dst) >= FIRST_FP_REG | |
225e4f43 | 3097 | && REGNO (dst) <= LAST_XD_REG) |
1245df60 R |
3098 | || REGNO (dst) == FPUL_REG)) |
3099 | { | |
b8794099 R |
3100 | /* This must be an insn that clobbers r0. */ |
3101 | rtx clobber = XVECEXP (PATTERN (scan), 0, | |
3102 | XVECLEN (PATTERN (scan), 0) - 1); | |
3103 | ||
3104 | if (GET_CODE (clobber) != CLOBBER | |
3105 | || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx)) | |
3106 | abort (); | |
3107 | ||
1245df60 R |
3108 | if (last_float |
3109 | && reg_set_between_p (r0_rtx, last_float_move, scan)) | |
3110 | last_float = 0; | |
3111 | lab = add_constant (src, mode, last_float); | |
3112 | if (lab) | |
3113 | emit_insn_before (gen_mova (lab), scan); | |
3114 | else | |
b8794099 R |
3115 | { |
3116 | /* There will be a REG_UNUSED note for r0 on | |
3117 | LAST_FLOAT_MOVE; we have to change it to REG_INC, | |
3118 | lest reorg:mark_target_live_regs will not | |
3119 | consider r0 to be used, and we end up with delay | |
3120 | slot insn in front of SCAN that clobber r0. */ | |
3121 | rtx note | |
3122 | = find_regno_note (last_float_move, REG_UNUSED, 0); | |
3123 | ||
ab1672a3 JW |
3124 | /* If we are not optimizing, then there may not be |
3125 | a note. */ | |
3126 | if (note) | |
3127 | PUT_MODE (note, REG_INC); | |
b8794099 R |
3128 | |
3129 | *last_float_addr = r0_inc_rtx; | |
3130 | } | |
1245df60 R |
3131 | last_float_move = scan; |
3132 | last_float = src; | |
3133 | newsrc = gen_rtx (MEM, mode, | |
225e4f43 R |
3134 | ((TARGET_SH4 && ! TARGET_FMOVD |
3135 | || REGNO (dst) == FPUL_REG) | |
1245df60 R |
3136 | ? r0_inc_rtx |
3137 | : r0_rtx)); | |
3138 | last_float_addr = &XEXP (newsrc, 0); | |
b8794099 R |
3139 | |
3140 | /* Remove the clobber of r0. */ | |
3141 | XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode); | |
1245df60 R |
3142 | } |
3143 | else | |
3144 | { | |
3145 | lab = add_constant (src, mode, 0); | |
c5c76735 JL |
3146 | newsrc = gen_rtx_MEM (mode, |
3147 | gen_rtx_LABEL_REF (VOIDmode, lab)); | |
1245df60 | 3148 | } |
8aa2a305 | 3149 | RTX_UNCHANGING_P (newsrc) = 1; |
c5c76735 | 3150 | *patp = gen_rtx_SET (VOIDmode, dst, newsrc); |
e577c183 | 3151 | INSN_CODE (scan) = -1; |
0d7e008e SC |
3152 | } |
3153 | } | |
3154 | dump_table (barrier); | |
1245df60 | 3155 | insn = barrier; |
0d7e008e SC |
3156 | } |
3157 | } | |
1245df60 R |
3158 | |
3159 | mdep_reorg_phase = SH_SHORTEN_BRANCHES1; | |
3160 | insn_addresses = 0; | |
3161 | split_branches (first); | |
3162 | ||
3163 | /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it | |
3164 | also has an effect on the register that holds the addres of the sfunc. | |
3165 | Insert an extra dummy insn in front of each sfunc that pretends to | |
3166 | use this register. */ | |
3167 | if (flag_delayed_branch) | |
3168 | { | |
3169 | for (insn = first; insn; insn = NEXT_INSN (insn)) | |
3170 | { | |
3171 | rtx reg = sfunc_uses_reg (insn); | |
3172 | ||
3173 | if (! reg) | |
3174 | continue; | |
3175 | emit_insn_before (gen_use_sfunc_addr (reg), insn); | |
3176 | } | |
3177 | } | |
225e4f43 R |
3178 | #if 0 |
3179 | /* fpscr is not actually a user variable, but we pretend it is for the | |
3180 | sake of the previous optimization passes, since we want it handled like | |
3181 | one. However, we don't have eny debugging information for it, so turn | |
3182 | it into a non-user variable now. */ | |
3183 | if (TARGET_SH4) | |
3184 | REG_USERVAR_P (get_fpscr_rtx ()) = 0; | |
3185 | #endif | |
3186 | if (optimize) | |
3187 | sh_flag_remove_dead_before_cse = 1; | |
1245df60 R |
3188 | mdep_reorg_phase = SH_AFTER_MDEP_REORG; |
3189 | } | |
3190 | ||
3191 | int | |
3192 | get_dest_uid (label, max_uid) | |
3193 | rtx label; | |
3194 | int max_uid; | |
3195 | { | |
3196 | rtx dest = next_real_insn (label); | |
3197 | int dest_uid; | |
3198 | if (! dest) | |
3199 | /* This can happen for an undefined label. */ | |
3200 | return 0; | |
3201 | dest_uid = INSN_UID (dest); | |
3202 | /* If this is a newly created branch redirection blocking instruction, | |
3203 | we cannot index the branch_uid or insn_addresses arrays with its | |
3204 | uid. But then, we won't need to, because the actual destination is | |
3205 | the following branch. */ | |
3206 | while (dest_uid >= max_uid) | |
3207 | { | |
3208 | dest = NEXT_INSN (dest); | |
3209 | dest_uid = INSN_UID (dest); | |
3210 | } | |
3211 | if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN) | |
3212 | return 0; | |
3213 | return dest_uid; | |
3214 | } | |
3215 | ||
3216 | /* Split condbranches that are out of range. Also add clobbers for | |
3217 | scratch registers that are needed in far jumps. | |
3218 | We do this before delay slot scheduling, so that it can take our | |
3219 | newly created instructions into account. It also allows us to | |
3220 | find branches with common targets more easily. */ | |
3221 | ||
3222 | static void | |
3223 | split_branches (first) | |
3224 | rtx first; | |
3225 | { | |
3226 | rtx insn; | |
3227 | struct far_branch **uid_branch, *far_branch_list = 0; | |
3228 | int max_uid = get_max_uid (); | |
3229 | ||
3230 | /* Find out which branches are out of range. */ | |
1245df60 R |
3231 | shorten_branches (first); |
3232 | ||
3233 | uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); | |
3234 | bzero ((char *) uid_branch, max_uid * sizeof *uid_branch); | |
3235 | ||
3236 | for (insn = first; insn; insn = NEXT_INSN (insn)) | |
3237 | if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') | |
3238 | continue; | |
3239 | else if (INSN_DELETED_P (insn)) | |
3240 | { | |
3241 | /* Shorten_branches would split this instruction again, | |
3242 | so transform it into a note. */ | |
3243 | PUT_CODE (insn, NOTE); | |
3244 | NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; | |
3245 | NOTE_SOURCE_FILE (insn) = 0; | |
3246 | } | |
3247 | else if (GET_CODE (insn) == JUMP_INSN | |
3248 | /* Don't mess with ADDR_DIFF_VEC */ | |
3249 | && (GET_CODE (PATTERN (insn)) == SET | |
3250 | || GET_CODE (PATTERN (insn)) == RETURN)) | |
3251 | { | |
3252 | enum attr_type type = get_attr_type (insn); | |
3253 | if (type == TYPE_CBRANCH) | |
3254 | { | |
3255 | rtx next, beyond; | |
3256 | ||
3257 | if (get_attr_length (insn) > 4) | |
3258 | { | |
3259 | rtx src = SET_SRC (PATTERN (insn)); | |
3260 | rtx cond = XEXP (src, 0); | |
3261 | rtx olabel = XEXP (XEXP (src, 1), 0); | |
3262 | rtx jump; | |
3263 | int addr = insn_addresses[INSN_UID (insn)]; | |
3264 | rtx label = 0; | |
3265 | int dest_uid = get_dest_uid (olabel, max_uid); | |
3266 | struct far_branch *bp = uid_branch[dest_uid]; | |
3267 | ||
3268 | /* redirect_jump needs a valid JUMP_LABEL, and it might delete | |
25938114 | 3269 | the label if the LABEL_NUSES count drops to zero. There is |
1245df60 R |
3270 | always a jump_optimize pass that sets these values, but it |
3271 | proceeds to delete unreferenced code, and then if not | |
956d6950 | 3272 | optimizing, to un-delete the deleted instructions, thus |
1245df60 R |
3273 | leaving labels with too low uses counts. */ |
3274 | if (! optimize) | |
3275 | { | |
3276 | JUMP_LABEL (insn) = olabel; | |
3277 | LABEL_NUSES (olabel)++; | |
3278 | } | |
3279 | if (! bp) | |
3280 | { | |
3281 | bp = (struct far_branch *) alloca (sizeof *bp); | |
3282 | uid_branch[dest_uid] = bp; | |
3283 | bp->prev = far_branch_list; | |
3284 | far_branch_list = bp; | |
3285 | bp->far_label | |
3286 | = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); | |
3287 | LABEL_NUSES (bp->far_label)++; | |
3288 | } | |
3289 | else | |
3290 | { | |
3291 | label = bp->near_label; | |
3292 | if (! label && bp->address - addr >= CONDJUMP_MIN) | |
3293 | { | |
3294 | rtx block = bp->insert_place; | |
3295 | ||
3296 | if (GET_CODE (PATTERN (block)) == RETURN) | |
3297 | block = PREV_INSN (block); | |
3298 | else | |
3299 | block = gen_block_redirect (block, | |
3300 | bp->address, 2); | |
3301 | label = emit_label_after (gen_label_rtx (), | |
3302 | PREV_INSN (block)); | |
3303 | bp->near_label = label; | |
3304 | } | |
3305 | else if (label && ! NEXT_INSN (label)) | |
3306 | if (addr + 2 - bp->address <= CONDJUMP_MAX) | |
3307 | bp->insert_place = insn; | |
3308 | else | |
3309 | gen_far_branch (bp); | |
3310 | } | |
3311 | if (! label | |
3312 | || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN) | |
3313 | { | |
3314 | bp->near_label = label = gen_label_rtx (); | |
3315 | bp->insert_place = insn; | |
3316 | bp->address = addr; | |
3317 | } | |
3318 | if (! redirect_jump (insn, label)) | |
3319 | abort (); | |
3320 | } | |
3321 | else | |
3322 | { | |
3323 | /* get_attr_length (insn) == 2 */ | |
3324 | /* Check if we have a pattern where reorg wants to redirect | |
3325 | the branch to a label from an unconditional branch that | |
3326 | is too far away. */ | |
3327 | /* We can't use JUMP_LABEL here because it might be undefined | |
3328 | when not optimizing. */ | |
33f7f353 | 3329 | /* A syntax error might cause beyond to be NULL_RTX. */ |
1245df60 R |
3330 | beyond |
3331 | = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), | |
3332 | 0)); | |
3333 | ||
33f7f353 JR |
3334 | if (beyond |
3335 | && (GET_CODE (beyond) == JUMP_INSN | |
3336 | || (GET_CODE (beyond = next_active_insn (beyond)) | |
3337 | == JUMP_INSN)) | |
1245df60 R |
3338 | && GET_CODE (PATTERN (beyond)) == SET |
3339 | && recog_memoized (beyond) == CODE_FOR_jump | |
3340 | && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))] | |
3341 | - insn_addresses[INSN_UID (insn)] + 252U) | |
3342 | > 252 + 258 + 2)) | |
3343 | gen_block_redirect (beyond, | |
3344 | insn_addresses[INSN_UID (beyond)], 1); | |
3345 | } | |
3346 | ||
3347 | next = next_active_insn (insn); | |
3348 | ||
3349 | if ((GET_CODE (next) == JUMP_INSN | |
3350 | || GET_CODE (next = next_active_insn (next)) == JUMP_INSN) | |
3351 | && GET_CODE (PATTERN (next)) == SET | |
3352 | && recog_memoized (next) == CODE_FOR_jump | |
3353 | && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))] | |
3354 | - insn_addresses[INSN_UID (insn)] + 252U) | |
3355 | > 252 + 258 + 2)) | |
3356 | gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1); | |
3357 | } | |
3358 | else if (type == TYPE_JUMP || type == TYPE_RETURN) | |
3359 | { | |
3360 | int addr = insn_addresses[INSN_UID (insn)]; | |
3361 | rtx far_label = 0; | |
3362 | int dest_uid = 0; | |
3363 | struct far_branch *bp; | |
3364 | ||
3365 | if (type == TYPE_JUMP) | |
3366 | { | |
3367 | far_label = XEXP (SET_SRC (PATTERN (insn)), 0); | |
3368 | dest_uid = get_dest_uid (far_label, max_uid); | |
3369 | if (! dest_uid) | |
3370 | { | |
3371 | /* Parse errors can lead to labels outside | |
3372 | the insn stream. */ | |
3373 | if (! NEXT_INSN (far_label)) | |
3374 | continue; | |
3375 | ||
3376 | if (! optimize) | |
3377 | { | |
3378 | JUMP_LABEL (insn) = far_label; | |
3379 | LABEL_NUSES (far_label)++; | |
3380 | } | |
3381 | redirect_jump (insn, NULL_RTX); | |
3382 | far_label = 0; | |
3383 | } | |
3384 | } | |
3385 | bp = uid_branch[dest_uid]; | |
3386 | if (! bp) | |
3387 | { | |
3388 | bp = (struct far_branch *) alloca (sizeof *bp); | |
3389 | uid_branch[dest_uid] = bp; | |
3390 | bp->prev = far_branch_list; | |
3391 | far_branch_list = bp; | |
3392 | bp->near_label = 0; | |
3393 | bp->far_label = far_label; | |
3394 | if (far_label) | |
3395 | LABEL_NUSES (far_label)++; | |
3396 | } | |
3397 | else if (bp->near_label && ! NEXT_INSN (bp->near_label)) | |
3398 | if (addr - bp->address <= CONDJUMP_MAX) | |
3399 | emit_label_after (bp->near_label, PREV_INSN (insn)); | |
3400 | else | |
3401 | { | |
3402 | gen_far_branch (bp); | |
3403 | bp->near_label = 0; | |
3404 | } | |
3405 | else | |
3406 | bp->near_label = 0; | |
3407 | bp->address = addr; | |
3408 | bp->insert_place = insn; | |
3409 | if (! far_label) | |
3410 | emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); | |
3411 | else | |
3412 | gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); | |
3413 | } | |
3414 | } | |
3415 | /* Generate all pending far branches, | |
3416 | and free our references to the far labels. */ | |
3417 | while (far_branch_list) | |
3418 | { | |
3419 | if (far_branch_list->near_label | |
3420 | && ! NEXT_INSN (far_branch_list->near_label)) | |
3421 | gen_far_branch (far_branch_list); | |
3422 | if (optimize | |
3423 | && far_branch_list->far_label | |
3424 | && ! --LABEL_NUSES (far_branch_list->far_label)) | |
3425 | delete_insn (far_branch_list->far_label); | |
3426 | far_branch_list = far_branch_list->prev; | |
3427 | } | |
a0798779 R |
3428 | |
3429 | /* Instruction length information is no longer valid due to the new | |
3430 | instructions that have been generated. */ | |
3431 | init_insn_lengths (); | |
b9654711 SC |
3432 | } |
3433 | ||
8aa2a305 | 3434 | /* Dump out instruction addresses, which is useful for debugging the |
933c3ba3 JW |
3435 | constant pool table stuff. |
3436 | ||
3437 | If relaxing, output the label and pseudo-ops used to link together | |
3438 | calls and the instruction which set the registers. */ | |
8aa2a305 JW |
3439 | |
3440 | /* ??? This is unnecessary, and probably should be deleted. This makes | |
3441 | the insn_addresses declaration above unnecessary. */ | |
3442 | ||
3443 | /* ??? The addresses printed by this routine for insns are nonsense for | |
3444 | insns which are inside of a sequence where none of the inner insns have | |
3445 | variable length. This is because the second pass of shorten_branches | |
3446 | does not bother to update them. */ | |
0d7e008e | 3447 | |
8e87e161 | 3448 | void |
8aa2a305 JW |
3449 | final_prescan_insn (insn, opvec, noperands) |
3450 | rtx insn; | |
3451 | rtx *opvec; | |
3452 | int noperands; | |
b9654711 | 3453 | { |
8aa2a305 JW |
3454 | if (TARGET_DUMPISIZE) |
3455 | fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]); | |
933c3ba3 JW |
3456 | |
3457 | if (TARGET_RELAX) | |
3458 | { | |
3459 | rtx note; | |
3460 | ||
3461 | note = find_reg_note (insn, REG_LABEL, NULL_RTX); | |
3462 | if (note) | |
3463 | { | |
3464 | rtx pattern; | |
3465 | ||
3466 | pattern = PATTERN (insn); | |
3467 | if (GET_CODE (pattern) == PARALLEL) | |
3468 | pattern = XVECEXP (pattern, 0, 0); | |
3469 | if (GET_CODE (pattern) == CALL | |
3470 | || (GET_CODE (pattern) == SET | |
4787bce0 JW |
3471 | && (GET_CODE (SET_SRC (pattern)) == CALL |
3472 | || get_attr_type (insn) == TYPE_SFUNC))) | |
4d7b7169 R |
3473 | asm_fprintf (asm_out_file, "\t.uses %LL%d\n", |
3474 | CODE_LABEL_NUMBER (XEXP (note, 0))); | |
933c3ba3 JW |
3475 | else if (GET_CODE (pattern) == SET) |
3476 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", | |
3477 | CODE_LABEL_NUMBER (XEXP (note, 0))); | |
3478 | else | |
3479 | abort (); | |
3480 | } | |
3481 | } | |
0d7e008e | 3482 | } |
b9654711 | 3483 | |
8aa2a305 | 3484 | /* Dump out any constants accumulated in the final pass. These will |
38e01259 | 3485 | only be labels. */ |
b9654711 | 3486 | |
8aa2a305 JW |
3487 | char * |
3488 | output_jump_label_table () | |
0d7e008e | 3489 | { |
8aa2a305 JW |
3490 | int i; |
3491 | ||
3492 | if (pool_size) | |
3493 | { | |
3494 | fprintf (asm_out_file, "\t.align 2\n"); | |
3495 | for (i = 0; i < pool_size; i++) | |
3496 | { | |
3497 | pool_node *p = &pool_vector[i]; | |
3498 | ||
3499 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", | |
3500 | CODE_LABEL_NUMBER (p->label)); | |
3501 | output_asm_insn (".long %O0", &p->value); | |
3502 | } | |
3503 | pool_size = 0; | |
3504 | } | |
b9654711 | 3505 | |
8aa2a305 JW |
3506 | return ""; |
3507 | } | |
3508 | \f | |
3509 | /* A full frame looks like: | |
16bea517 JW |
3510 | |
3511 | arg-5 | |
3512 | arg-4 | |
3513 | [ if current_function_anonymous_args | |
3514 | arg-3 | |
3515 | arg-2 | |
3516 | arg-1 | |
3517 | arg-0 ] | |
3518 | saved-fp | |
3519 | saved-r10 | |
3520 | saved-r11 | |
3521 | saved-r12 | |
3522 | saved-pr | |
3523 | local-n | |
3524 | .. | |
3525 | local-1 | |
3526 | local-0 <- fp points here. */ | |
8e87e161 | 3527 | |
8aa2a305 JW |
3528 | /* Number of bytes pushed for anonymous args, used to pass information |
3529 | between expand_prologue and expand_epilogue. */ | |
3530 | ||
3531 | static int extra_push; | |
3532 | ||
885c9a39 JR |
3533 | /* Adjust the stack by SIZE bytes. REG holds the rtl of the register |
3534 | to be adjusted, and TEMP, if nonnegative, holds the register number | |
3535 | of a general register that we may clobber. */ | |
8aa2a305 JW |
3536 | |
3537 | static void | |
885c9a39 | 3538 | output_stack_adjust (size, reg, temp) |
8aa2a305 | 3539 | int size; |
46d81ffa | 3540 | rtx reg; |
885c9a39 | 3541 | int temp; |
8aa2a305 JW |
3542 | { |
3543 | if (size) | |
3544 | { | |
f3cd5375 JW |
3545 | if (CONST_OK_FOR_I (size)) |
3546 | emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); | |
3547 | /* Try to do it with two partial adjustments; however, we must make | |
3548 | sure that the stack is properly aligned at all times, in case | |
3549 | an interrupt occurs between the two partial adjustments. */ | |
3550 | else if (CONST_OK_FOR_I (size / 2 & -4) | |
3551 | && CONST_OK_FOR_I (size - (size / 2 & -4))) | |
3552 | { | |
3553 | emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4))); | |
3554 | emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4)))); | |
3555 | } | |
3556 | else | |
8aa2a305 | 3557 | { |
f3cd5375 JW |
3558 | rtx const_reg; |
3559 | ||
3560 | /* If TEMP is invalid, we could temporarily save a general | |
3561 | register to MACL. However, there is currently no need | |
3562 | to handle this case, so just abort when we see it. */ | |
3563 | if (temp < 0) | |
3564 | abort (); | |
c5c76735 | 3565 | const_reg = gen_rtx_REG (SImode, temp); |
f3cd5375 JW |
3566 | |
3567 | /* If SIZE is negative, subtract the positive value. | |
3568 | This sometimes allows a constant pool entry to be shared | |
3569 | between prologue and epilogue code. */ | |
3570 | if (size < 0) | |
885c9a39 | 3571 | { |
f3cd5375 JW |
3572 | emit_insn (gen_movsi (const_reg, GEN_INT (-size))); |
3573 | emit_insn (gen_subsi3 (reg, reg, const_reg)); | |
885c9a39 JR |
3574 | } |
3575 | else | |
3576 | { | |
f3cd5375 JW |
3577 | emit_insn (gen_movsi (const_reg, GEN_INT (size))); |
3578 | emit_insn (gen_addsi3 (reg, reg, const_reg)); | |
885c9a39 | 3579 | } |
8aa2a305 | 3580 | } |
8aa2a305 JW |
3581 | } |
3582 | } | |
3583 | ||
3584 | /* Output RTL to push register RN onto the stack. */ | |
3585 | ||
3586 | static void | |
3587 | push (rn) | |
3588 | int rn; | |
3589 | { | |
3590 | rtx x; | |
225e4f43 R |
3591 | if (rn == FPUL_REG) |
3592 | x = gen_push_fpul (); | |
3593 | else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE | |
3594 | && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) | |
3595 | { | |
3596 | if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) | |
3597 | return; | |
c5c76735 | 3598 | x = gen_push_4 (gen_rtx_REG (DFmode, rn)); |
225e4f43 R |
3599 | } |
3600 | else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) | |
c5c76735 | 3601 | x = gen_push_e (gen_rtx_REG (SFmode, rn)); |
1a95a963 | 3602 | else |
c5c76735 | 3603 | x = gen_push (gen_rtx_REG (SImode, rn)); |
1a95a963 | 3604 | |
1245df60 | 3605 | x = emit_insn (x); |
c5c76735 JL |
3606 | REG_NOTES (x) |
3607 | = gen_rtx_EXPR_LIST (REG_INC, | |
3608 | gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0); | |
8aa2a305 JW |
3609 | } |
3610 | ||
3611 | /* Output RTL to pop register RN from the stack. */ | |
3612 | ||
3613 | static void | |
3614 | pop (rn) | |
3615 | int rn; | |
3616 | { | |
3617 | rtx x; | |
225e4f43 R |
3618 | if (rn == FPUL_REG) |
3619 | x = gen_pop_fpul (); | |
3620 | else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE | |
3621 | && rn >= FIRST_FP_REG && rn <= LAST_XD_REG) | |
3622 | { | |
3623 | if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG) | |
3624 | return; | |
c5c76735 | 3625 | x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); |
225e4f43 R |
3626 | } |
3627 | else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG) | |
c5c76735 | 3628 | x = gen_pop_e (gen_rtx_REG (SFmode, rn)); |
1a95a963 | 3629 | else |
c5c76735 | 3630 | x = gen_pop (gen_rtx_REG (SImode, rn)); |
1a95a963 | 3631 | |
1245df60 | 3632 | x = emit_insn (x); |
c5c76735 JL |
3633 | REG_NOTES (x) |
3634 | = gen_rtx_EXPR_LIST (REG_INC, | |
3635 | gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0); | |
8aa2a305 JW |
3636 | } |
3637 | ||
1245df60 | 3638 | /* Generate code to push the regs specified in the mask. */ |
8aa2a305 JW |
3639 | |
3640 | static void | |
1a95a963 JW |
3641 | push_regs (mask, mask2) |
3642 | int mask, mask2; | |
8aa2a305 JW |
3643 | { |
3644 | int i; | |
3645 | ||
1245df60 R |
3646 | /* Push PR last; this gives better latencies after the prologue, and |
3647 | candidates for the return delay slot when there are no general | |
3648 | registers pushed. */ | |
1a95a963 | 3649 | for (i = 0; i < 32; i++) |
1245df60 | 3650 | if (mask & (1 << i) && i != PR_REG) |
8aa2a305 | 3651 | push (i); |
1a95a963 JW |
3652 | for (i = 32; i < FIRST_PSEUDO_REGISTER; i++) |
3653 | if (mask2 & (1 << (i - 32))) | |
3654 | push (i); | |
1245df60 R |
3655 | if (mask & (1 << PR_REG)) |
3656 | push (PR_REG); | |
8aa2a305 JW |
3657 | } |
3658 | ||
3659 | /* Work out the registers which need to be saved, both as a mask and a | |
1245df60 | 3660 | count of saved words. |
8aa2a305 JW |
3661 | |
3662 | If doing a pragma interrupt function, then push all regs used by the | |
3663 | function, and if we call another function (we can tell by looking at PR), | |
3664 | make sure that all the regs it clobbers are safe too. */ | |
3665 | ||
3666 | static int | |
1a95a963 | 3667 | calc_live_regs (count_ptr, live_regs_mask2) |
8aa2a305 | 3668 | int *count_ptr; |
1a95a963 | 3669 | int *live_regs_mask2; |
8aa2a305 JW |
3670 | { |
3671 | int reg; | |
3672 | int live_regs_mask = 0; | |
1245df60 | 3673 | int count; |
157131d7 SC |
3674 | int interrupt_handler; |
3675 | ||
3676 | if ((lookup_attribute | |
3677 | ("interrupt_handler", | |
3678 | DECL_MACHINE_ATTRIBUTES (current_function_decl))) | |
3679 | != NULL_TREE) | |
3680 | interrupt_handler = 1; | |
3681 | else | |
3682 | interrupt_handler = 0; | |
8aa2a305 | 3683 | |
1a95a963 | 3684 | *live_regs_mask2 = 0; |
225e4f43 R |
3685 | /* If we can save a lot of saves by switching to double mode, do that. */ |
3686 | if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE) | |
3687 | for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) | |
3688 | if (regs_ever_live[reg] && regs_ever_live[reg+1] | |
157131d7 | 3689 | && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa)) |
225e4f43 R |
3690 | && ++count > 2) |
3691 | { | |
3692 | target_flags &= ~FPU_SINGLE_BIT; | |
3693 | break; | |
3694 | } | |
1245df60 | 3695 | for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--) |
8aa2a305 | 3696 | { |
157131d7 | 3697 | if ((interrupt_handler && ! pragma_trapa) |
1245df60 R |
3698 | ? (/* Need to save all the regs ever live. */ |
3699 | (regs_ever_live[reg] | |
3700 | || (call_used_regs[reg] | |
3701 | && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG) | |
3702 | && regs_ever_live[PR_REG])) | |
3703 | && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM | |
3704 | && reg != RETURN_ADDRESS_POINTER_REGNUM | |
225e4f43 | 3705 | && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG) |
1245df60 R |
3706 | : (/* Only push those regs which are used and need to be saved. */ |
3707 | regs_ever_live[reg] && ! call_used_regs[reg])) | |
8aa2a305 | 3708 | { |
1245df60 R |
3709 | if (reg >= 32) |
3710 | *live_regs_mask2 |= 1 << (reg - 32); | |
3711 | else | |
3712 | live_regs_mask |= 1 << reg; | |
3713 | count++; | |
225e4f43 R |
3714 | if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG) |
3715 | if (reg <= LAST_FP_REG) | |
3716 | { | |
3717 | if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1]) | |
3718 | { | |
3719 | if (reg >= 32) | |
3720 | *live_regs_mask2 |= 1 << ((reg ^ 1) - 32); | |
3721 | else | |
3722 | live_regs_mask |= 1 << (reg ^ 1); | |
3723 | count++; | |
3724 | } | |
3725 | } | |
3726 | else if (reg <= LAST_XD_REG) | |
3727 | { | |
3728 | /* Must switch to double mode to access these registers. */ | |
3729 | target_flags &= ~FPU_SINGLE_BIT; | |
3730 | count++; | |
3731 | } | |
8aa2a305 JW |
3732 | } |
3733 | } | |
3734 | ||
3735 | *count_ptr = count; | |
3736 | return live_regs_mask; | |
3737 | } | |
3738 | ||
3739 | /* Code to generate prologue and epilogue sequences */ | |
b9654711 SC |
3740 | |
3741 | void | |
3742 | sh_expand_prologue () | |
3743 | { | |
3744 | int live_regs_mask; | |
40d2032b | 3745 | int d, i; |
1a95a963 | 3746 | int live_regs_mask2; |
225e4f43 | 3747 | int save_flags = target_flags; |
3d5a0820 | 3748 | int double_align = 0; |
b9654711 | 3749 | |
0d7e008e | 3750 | /* We have pretend args if we had an object sent partially in registers |
8aa2a305 | 3751 | and partially on the stack, e.g. a large structure. */ |
885c9a39 JR |
3752 | output_stack_adjust (-current_function_pretend_args_size, |
3753 | stack_pointer_rtx, 3); | |
b9654711 | 3754 | |
40d2032b JW |
3755 | extra_push = 0; |
3756 | ||
3757 | /* This is set by SETUP_VARARGS to indicate that this is a varargs | |
1a95a963 | 3758 | routine. Clear it here so that the next function isn't affected. */ |
b9654711 SC |
3759 | if (current_function_anonymous_args) |
3760 | { | |
40d2032b JW |
3761 | current_function_anonymous_args = 0; |
3762 | ||
1a95a963 | 3763 | /* This is not used by the SH3E calling convention */ |
9ab70a9b | 3764 | if (! TARGET_SH3E && ! TARGET_HITACHI) |
1245df60 | 3765 | { |
1a95a963 JW |
3766 | /* Push arg regs as if they'd been provided by caller in stack. */ |
3767 | for (i = 0; i < NPARM_REGS(SImode); i++) | |
3768 | { | |
3769 | int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; | |
aec373f1 R |
3770 | if (i >= (NPARM_REGS(SImode) |
3771 | - current_function_args_info.arg_count[(int) SH_ARG_INT] | |
3772 | )) | |
1a95a963 JW |
3773 | break; |
3774 | push (rn); | |
3775 | extra_push += 4; | |
3776 | } | |
1245df60 | 3777 | } |
b9654711 | 3778 | } |
1a95a963 | 3779 | |
4408efce JL |
3780 | /* If we're supposed to switch stacks at function entry, do so now. */ |
3781 | if (sp_switch) | |
3782 | emit_insn (gen_sp_switch_1 ()); | |
3783 | ||
1245df60 | 3784 | live_regs_mask = calc_live_regs (&d, &live_regs_mask2); |
225e4f43 R |
3785 | /* ??? Maybe we could save some switching if we can move a mode switch |
3786 | that already happens to be at the function start into the prologue. */ | |
3787 | if (target_flags != save_flags) | |
3788 | emit_insn (gen_toggle_sz ()); | |
1a95a963 | 3789 | push_regs (live_regs_mask, live_regs_mask2); |
225e4f43 R |
3790 | if (target_flags != save_flags) |
3791 | emit_insn (gen_toggle_sz ()); | |
1a95a963 | 3792 | |
3d5a0820 R |
3793 | if (TARGET_ALIGN_DOUBLE && d & 1) |
3794 | double_align = 4; | |
3795 | ||
225e4f43 R |
3796 | target_flags = save_flags; |
3797 | ||
3d5a0820 R |
3798 | output_stack_adjust (-get_frame_size () - double_align, |
3799 | stack_pointer_rtx, 3); | |
b9654711 SC |
3800 | |
3801 | if (frame_pointer_needed) | |
8aa2a305 | 3802 | emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); |
b9654711 SC |
3803 | } |
3804 | ||
3805 | void | |
3806 | sh_expand_epilogue () | |
3807 | { | |
3808 | int live_regs_mask; | |
40d2032b | 3809 | int d, i; |
b9654711 | 3810 | |
1a95a963 | 3811 | int live_regs_mask2; |
225e4f43 | 3812 | int save_flags = target_flags; |
3d5a0820 R |
3813 | int frame_size = get_frame_size (); |
3814 | ||
3815 | live_regs_mask = calc_live_regs (&d, &live_regs_mask2); | |
3816 | ||
3817 | if (TARGET_ALIGN_DOUBLE && d & 1) | |
3818 | frame_size += 4; | |
16bea517 | 3819 | |
b9654711 | 3820 | if (frame_pointer_needed) |
46d81ffa | 3821 | { |
3d5a0820 | 3822 | output_stack_adjust (frame_size, frame_pointer_rtx, 7); |
07f5b9aa JL |
3823 | |
3824 | /* We must avoid moving the stack pointer adjustment past code | |
3825 | which reads from the local frame, else an interrupt could | |
3826 | occur after the SP adjustment and clobber data in the local | |
3827 | frame. */ | |
3828 | emit_insn (gen_blockage ()); | |
46d81ffa RK |
3829 | emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); |
3830 | } | |
3d5a0820 | 3831 | else if (frame_size) |
07f5b9aa JL |
3832 | { |
3833 | /* We must avoid moving the stack pointer adjustment past code | |
3834 | which reads from the local frame, else an interrupt could | |
3835 | occur after the SP adjustment and clobber data in the local | |
3836 | frame. */ | |
3837 | emit_insn (gen_blockage ()); | |
3d5a0820 | 3838 | output_stack_adjust (frame_size, stack_pointer_rtx, 7); |
07f5b9aa | 3839 | } |
b9654711 | 3840 | |
16bea517 | 3841 | /* Pop all the registers. */ |
0d7e008e | 3842 | |
225e4f43 R |
3843 | if (target_flags != save_flags) |
3844 | emit_insn (gen_toggle_sz ()); | |
1245df60 R |
3845 | if (live_regs_mask & (1 << PR_REG)) |
3846 | pop (PR_REG); | |
b9654711 SC |
3847 | for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
3848 | { | |
3849 | int j = (FIRST_PSEUDO_REGISTER - 1) - i; | |
1245df60 | 3850 | if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG) |
1a95a963 JW |
3851 | pop (j); |
3852 | else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32)))) | |
8aa2a305 | 3853 | pop (j); |
b9654711 | 3854 | } |
225e4f43 R |
3855 | if (target_flags != save_flags) |
3856 | emit_insn (gen_toggle_sz ()); | |
3857 | target_flags = save_flags; | |
b9654711 | 3858 | |
46d81ffa | 3859 | output_stack_adjust (extra_push + current_function_pretend_args_size, |
885c9a39 | 3860 | stack_pointer_rtx, 7); |
4408efce JL |
3861 | |
3862 | /* Switch back to the normal stack if necessary. */ | |
3863 | if (sp_switch) | |
3864 | emit_insn (gen_sp_switch_2 ()); | |
b9654711 SC |
3865 | } |
3866 | ||
8aa2a305 JW |
3867 | /* Clear variables at function end. */ |
3868 | ||
3869 | void | |
3870 | function_epilogue (stream, size) | |
3871 | FILE *stream; | |
3872 | int size; | |
3873 | { | |
4408efce JL |
3874 | trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0; |
3875 | sp_switch = NULL_RTX; | |
8aa2a305 JW |
3876 | } |
3877 | ||
1a95a963 | 3878 | rtx |
648d2ffc | 3879 | sh_builtin_saveregs () |
1a95a963 JW |
3880 | { |
3881 | tree fntype = TREE_TYPE (current_function_decl); | |
3882 | /* First unnamed integer register. */ | |
3883 | int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT]; | |
3884 | /* Number of integer registers we need to save. */ | |
3885 | int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); | |
3886 | /* First unnamed SFmode float reg */ | |
3887 | int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT]; | |
3888 | /* Number of SFmode float regs to save. */ | |
3889 | int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); | |
3890 | int ptrsize = GET_MODE_SIZE (Pmode); | |
3891 | rtx valist, regbuf, fpregs; | |
3892 | int bufsize, regno; | |
3893 | ||
3894 | /* Allocate block of memory for the regs. */ | |
3895 | /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? | |
3896 | Or can assign_stack_local accept a 0 SIZE argument? */ | |
3897 | bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); | |
3898 | ||
3899 | regbuf = assign_stack_local (BLKmode, bufsize, 0); | |
c6df88cb | 3900 | MEM_SET_IN_STRUCT_P (regbuf, 1); |
1a95a963 JW |
3901 | |
3902 | /* Save int args. | |
3903 | This is optimized to only save the regs that are necessary. Explicitly | |
3904 | named args need not be saved. */ | |
3905 | if (n_intregs > 0) | |
3906 | move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, | |
c5c76735 JL |
3907 | gen_rtx_MEM (BLKmode, |
3908 | plus_constant (XEXP (regbuf, 0), | |
3909 | (n_floatregs | |
3910 | * UNITS_PER_WORD))), | |
1a95a963 JW |
3911 | n_intregs, n_intregs * UNITS_PER_WORD); |
3912 | ||
3913 | /* Save float args. | |
3914 | This is optimized to only save the regs that are necessary. Explicitly | |
3915 | named args need not be saved. | |
3916 | We explicitly build a pointer to the buffer because it halves the insn | |
3917 | count when not optimizing (otherwise the pointer is built for each reg | |
1245df60 R |
3918 | saved). |
3919 | We emit the moves in reverse order so that we can use predecrement. */ | |
1a95a963 JW |
3920 | |
3921 | fpregs = gen_reg_rtx (Pmode); | |
3922 | emit_move_insn (fpregs, XEXP (regbuf, 0)); | |
1245df60 R |
3923 | emit_insn (gen_addsi3 (fpregs, fpregs, |
3924 | GEN_INT (n_floatregs * UNITS_PER_WORD))); | |
225e4f43 R |
3925 | if (TARGET_SH4) |
3926 | { | |
3927 | for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) | |
3928 | { | |
3929 | emit_insn (gen_addsi3 (fpregs, fpregs, | |
3930 | GEN_INT (-2 * UNITS_PER_WORD))); | |
3931 | emit_move_insn (gen_rtx (MEM, DFmode, fpregs), | |
3932 | gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno)); | |
3933 | } | |
3934 | regno = first_floatreg; | |
3935 | if (regno & 1) | |
3936 | { | |
3937 | emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); | |
3938 | emit_move_insn (gen_rtx (MEM, SFmode, fpregs), | |
3939 | gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno | |
3940 | - (TARGET_LITTLE_ENDIAN != 0))); | |
3941 | } | |
3942 | } | |
3943 | else | |
1245df60 R |
3944 | for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) |
3945 | { | |
3946 | emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD))); | |
c5c76735 JL |
3947 | emit_move_insn (gen_rtx_MEM (SFmode, fpregs), |
3948 | gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); | |
1245df60 | 3949 | } |
1a95a963 JW |
3950 | |
3951 | /* Return the address of the regbuf. */ | |
3952 | return XEXP (regbuf, 0); | |
3953 | } | |
3954 | ||
0d7e008e SC |
3955 | /* Define the offset between two registers, one to be eliminated, and |
3956 | the other its replacement, at the start of a routine. */ | |
3957 | ||
3958 | int | |
3959 | initial_elimination_offset (from, to) | |
8e87e161 SC |
3960 | int from; |
3961 | int to; | |
0d7e008e SC |
3962 | { |
3963 | int regs_saved; | |
0d7e008e SC |
3964 | int total_saved_regs_space; |
3965 | int total_auto_space = get_frame_size (); | |
1245df60 | 3966 | int save_flags = target_flags; |
8e87e161 | 3967 | |
2afeea0f JW |
3968 | int live_regs_mask, live_regs_mask2; |
3969 | live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2); | |
225e4f43 R |
3970 | if (TARGET_ALIGN_DOUBLE && regs_saved & 1) |
3971 | total_auto_space += 4; | |
1245df60 | 3972 | target_flags = save_flags; |
1a95a963 | 3973 | |
0d7e008e | 3974 | total_saved_regs_space = (regs_saved) * 4; |
b9654711 | 3975 | |
0d7e008e | 3976 | if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) |
8aa2a305 JW |
3977 | return total_saved_regs_space + total_auto_space; |
3978 | ||
0d7e008e | 3979 | if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
8aa2a305 JW |
3980 | return total_saved_regs_space + total_auto_space; |
3981 | ||
3982 | /* Initial gap between fp and sp is 0. */ | |
0d7e008e | 3983 | if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
8aa2a305 JW |
3984 | return 0; |
3985 | ||
2afeea0f JW |
3986 | if (from == RETURN_ADDRESS_POINTER_REGNUM |
3987 | && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM)) | |
3988 | { | |
1245df60 R |
3989 | int i, n = total_saved_regs_space; |
3990 | for (i = PR_REG-1; i >= 0; i--) | |
2afeea0f | 3991 | if (live_regs_mask & (1 << i)) |
1245df60 | 3992 | n -= 4; |
2afeea0f JW |
3993 | return n + total_auto_space; |
3994 | } | |
3995 | ||
0d7e008e SC |
3996 | abort (); |
3997 | } | |
8aa2a305 | 3998 | \f |
0d7e008e | 3999 | /* Handle machine specific pragmas to be semi-compatible with Hitachi |
16bea517 | 4000 | compiler. */ |
b9654711 SC |
4001 | |
4002 | int | |
f43a85ca NC |
4003 | sh_handle_pragma (p_getc, p_ungetc, pname) |
4004 | int (* p_getc) PROTO((void)); | |
4005 | void (* p_ungetc) PROTO((int)); | |
4006 | char * pname; | |
b9654711 | 4007 | { |
119d0c36 | 4008 | int retval = 0; |
05a81fe5 | 4009 | |
119d0c36 JW |
4010 | if (strcmp (pname, "interrupt") == 0) |
4011 | pragma_interrupt = retval = 1; | |
4012 | else if (strcmp (pname, "trapa") == 0) | |
4013 | pragma_interrupt = pragma_trapa = retval = 1; | |
4014 | else if (strcmp (pname, "nosave_low_regs") == 0) | |
4015 | pragma_nosave_low_regs = retval = 1; | |
05a81fe5 | 4016 | |
119d0c36 | 4017 | return retval; |
0d7e008e | 4018 | } |
157131d7 SC |
4019 | |
4020 | /* Generate 'handle_interrupt' attribute for decls */ | |
4021 | ||
4022 | void | |
4023 | sh_pragma_insert_attributes (node, attributes, prefix) | |
4024 | tree node; | |
4025 | tree * attributes; | |
4026 | tree * prefix; | |
4027 | { | |
4028 | tree a; | |
4029 | ||
4030 | if (! pragma_interrupt | |
4031 | || TREE_CODE (node) != FUNCTION_DECL) | |
4032 | return; | |
4033 | ||
4034 | /* We are only interested in fields. */ | |
4035 | if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd') | |
4036 | return; | |
4037 | ||
4038 | /* Add a 'handle_interrupt' attribute. */ | |
4039 | * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes); | |
4040 | ||
4041 | return; | |
4042 | } | |
4043 | ||
4408efce JL |
4044 | /* Return nonzero if ATTR is a valid attribute for DECL. |
4045 | ATTRIBUTES are any existing attributes and ARGS are the arguments | |
4046 | supplied with ATTR. | |
4047 | ||
4048 | Supported attributes: | |
4049 | ||
4050 | interrupt_handler -- specifies this function is an interrupt handler. | |
4051 | ||
4052 | sp_switch -- specifies an alternate stack for an interrupt handler | |
4053 | to run on. | |
4054 | ||
956d6950 | 4055 | trap_exit -- use a trapa to exit an interrupt function instead of |
4408efce JL |
4056 | an rte instruction. */ |
4057 | ||
4058 | int | |
4059 | sh_valid_machine_decl_attribute (decl, attributes, attr, args) | |
4060 | tree decl; | |
4061 | tree attributes; | |
4062 | tree attr; | |
4063 | tree args; | |
4064 | { | |
4065 | int retval = 0; | |
4066 | ||
4067 | if (TREE_CODE (decl) != FUNCTION_DECL) | |
4068 | return 0; | |
4069 | ||
4070 | if (is_attribute_p ("interrupt_handler", attr)) | |
4071 | { | |
4408efce JL |
4072 | return 1; |
4073 | } | |
4074 | ||
4075 | if (is_attribute_p ("sp_switch", attr)) | |
4076 | { | |
4077 | /* The sp_switch attribute only has meaning for interrupt functions. */ | |
4078 | if (!pragma_interrupt) | |
4079 | return 0; | |
4080 | ||
4081 | /* sp_switch must have an argument. */ | |
4082 | if (!args || TREE_CODE (args) != TREE_LIST) | |
4083 | return 0; | |
4084 | ||
4085 | /* The argument must be a constant string. */ | |
4086 | if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) | |
4087 | return 0; | |
4088 | ||
c5c76735 JL |
4089 | sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, |
4090 | TREE_STRING_POINTER (TREE_VALUE (args))); | |
4408efce JL |
4091 | return 1; |
4092 | } | |
4093 | ||
4094 | if (is_attribute_p ("trap_exit", attr)) | |
4095 | { | |
4096 | /* The trap_exit attribute only has meaning for interrupt functions. */ | |
4097 | if (!pragma_interrupt) | |
4098 | return 0; | |
4099 | ||
4100 | /* trap_exit must have an argument. */ | |
4101 | if (!args || TREE_CODE (args) != TREE_LIST) | |
4102 | return 0; | |
4103 | ||
4104 | /* The argument must be a constant integer. */ | |
4105 | if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) | |
4106 | return 0; | |
4107 | ||
4108 | trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args)); | |
4109 | return 1; | |
4110 | } | |
4111 | } | |
4112 | ||
0d7e008e | 4113 | \f |
8aa2a305 | 4114 | /* Predicates used by the templates. */ |
0d7e008e | 4115 | |
8aa2a305 JW |
4116 | /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx. |
4117 | Used only in general_movsrc_operand. */ | |
0d7e008e | 4118 | |
8aa2a305 JW |
4119 | int |
4120 | system_reg_operand (op, mode) | |
4121 | rtx op; | |
4122 | enum machine_mode mode; | |
0d7e008e | 4123 | { |
8aa2a305 | 4124 | switch (REGNO (op)) |
0d7e008e | 4125 | { |
8aa2a305 JW |
4126 | case PR_REG: |
4127 | case MACL_REG: | |
4128 | case MACH_REG: | |
4129 | return 1; | |
0d7e008e | 4130 | } |
8aa2a305 | 4131 | return 0; |
0d7e008e | 4132 | } |
0d7e008e SC |
4133 | |
4134 | /* Returns 1 if OP can be source of a simple move operation. | |
4135 | Same as general_operand, but a LABEL_REF is valid, PRE_DEC is | |
16bea517 | 4136 | invalid as are subregs of system registers. */ |
0d7e008e SC |
4137 | |
4138 | int | |
4139 | general_movsrc_operand (op, mode) | |
4140 | rtx op; | |
4141 | enum machine_mode mode; | |
4142 | { | |
06c386ea SC |
4143 | if (GET_CODE (op) == MEM) |
4144 | { | |
4145 | rtx inside = XEXP (op, 0); | |
4146 | if (GET_CODE (inside) == CONST) | |
4147 | inside = XEXP (inside, 0); | |
0d7e008e | 4148 | |
06c386ea SC |
4149 | if (GET_CODE (inside) == LABEL_REF) |
4150 | return 1; | |
8e87e161 | 4151 | |
06c386ea | 4152 | if (GET_CODE (inside) == PLUS |
8aa2a305 JW |
4153 | && GET_CODE (XEXP (inside, 0)) == LABEL_REF |
4154 | && GET_CODE (XEXP (inside, 1)) == CONST_INT) | |
06c386ea | 4155 | return 1; |
16bea517 JW |
4156 | |
4157 | /* Only post inc allowed. */ | |
97f8690b | 4158 | if (GET_CODE (inside) == PRE_DEC) |
06c386ea | 4159 | return 0; |
06c386ea | 4160 | } |
0d7e008e SC |
4161 | |
4162 | if ((mode == QImode || mode == HImode) | |
4163 | && (GET_CODE (op) == SUBREG | |
4164 | && GET_CODE (XEXP (op, 0)) == REG | |
4165 | && system_reg_operand (XEXP (op, 0), mode))) | |
4166 | return 0; | |
4167 | ||
0d7e008e | 4168 | return general_operand (op, mode); |
b9654711 SC |
4169 | } |
4170 | ||
0d7e008e SC |
4171 | /* Returns 1 if OP can be a destination of a move. |
4172 | Same as general_operand, but no preinc allowed. */ | |
4173 | ||
b9654711 | 4174 | int |
0d7e008e SC |
4175 | general_movdst_operand (op, mode) |
4176 | rtx op; | |
4177 | enum machine_mode mode; | |
b9654711 | 4178 | { |
16bea517 | 4179 | /* Only pre dec allowed. */ |
97f8690b | 4180 | if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC) |
0d7e008e | 4181 | return 0; |
d3ae8277 | 4182 | |
0d7e008e SC |
4183 | return general_operand (op, mode); |
4184 | } | |
4185 | ||
0d7e008e SC |
4186 | /* Returns 1 if OP is a normal arithmetic register. */ |
4187 | ||
4188 | int | |
4189 | arith_reg_operand (op, mode) | |
4190 | rtx op; | |
4191 | enum machine_mode mode; | |
4192 | { | |
4193 | if (register_operand (op, mode)) | |
4194 | { | |
519164a9 JW |
4195 | int regno; |
4196 | ||
0d7e008e | 4197 | if (GET_CODE (op) == REG) |
519164a9 JW |
4198 | regno = REGNO (op); |
4199 | else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) | |
4200 | regno = REGNO (SUBREG_REG (op)); | |
4201 | else | |
4202 | return 1; | |
4203 | ||
225e4f43 R |
4204 | return (regno != T_REG && regno != PR_REG |
4205 | && (regno != FPUL_REG || TARGET_SH4) | |
4206 | && regno != MACH_REG && regno != MACL_REG); | |
4207 | } | |
4208 | return 0; | |
4209 | } | |
4210 | ||
4211 | int | |
4212 | fp_arith_reg_operand (op, mode) | |
4213 | rtx op; | |
4214 | enum machine_mode mode; | |
4215 | { | |
4216 | if (register_operand (op, mode)) | |
4217 | { | |
4218 | int regno; | |
4219 | ||
4220 | if (GET_CODE (op) == REG) | |
4221 | regno = REGNO (op); | |
4222 | else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) | |
4223 | regno = REGNO (SUBREG_REG (op)); | |
4224 | else | |
4225 | return 1; | |
4226 | ||
66c0b347 R |
4227 | return (regno >= FIRST_PSEUDO_REGISTER |
4228 | || (regno >= FIRST_FP_REG && regno <= LAST_FP_REG)); | |
0d7e008e SC |
4229 | } |
4230 | return 0; | |
4231 | } | |
4232 | ||
225e4f43 R |
4233 | int |
4234 | fp_extended_operand (op, mode) | |
4235 | rtx op; | |
4236 | enum machine_mode mode; | |
4237 | { | |
4238 | if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode) | |
4239 | { | |
4240 | op = XEXP (op, 0); | |
4241 | mode = GET_MODE (op); | |
4242 | } | |
66c0b347 R |
4243 | if (register_operand (op, mode)) |
4244 | { | |
4245 | int regno; | |
4246 | ||
4247 | if (GET_CODE (op) == REG) | |
4248 | regno = REGNO (op); | |
4249 | else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG) | |
4250 | regno = REGNO (SUBREG_REG (op)); | |
4251 | else | |
4252 | return 1; | |
4253 | ||
4254 | return (regno != T_REG && regno != PR_REG && regno > 15 | |
4255 | && regno != MACH_REG && regno != MACL_REG); | |
4256 | } | |
4257 | return 0; | |
225e4f43 R |
4258 | } |
4259 | ||
0d7e008e SC |
4260 | /* Returns 1 if OP is a valid source operand for an arithmetic insn. */ |
4261 | ||
4262 | int | |
4263 | arith_operand (op, mode) | |
4264 | rtx op; | |
4265 | enum machine_mode mode; | |
4266 | { | |
4267 | if (arith_reg_operand (op, mode)) | |
4268 | return 1; | |
4269 | ||
8aa2a305 JW |
4270 | if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) |
4271 | return 1; | |
4272 | ||
0d7e008e SC |
4273 | return 0; |
4274 | } | |
4275 | ||
22e1ebf1 JW |
4276 | /* Returns 1 if OP is a valid source operand for a compare insn. */ |
4277 | ||
4278 | int | |
4279 | arith_reg_or_0_operand (op, mode) | |
4280 | rtx op; | |
4281 | enum machine_mode mode; | |
4282 | { | |
4283 | if (arith_reg_operand (op, mode)) | |
4284 | return 1; | |
4285 | ||
8aa2a305 JW |
4286 | if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op))) |
4287 | return 1; | |
4288 | ||
22e1ebf1 JW |
4289 | return 0; |
4290 | } | |
4291 | ||
16bea517 | 4292 | /* Returns 1 if OP is a valid source operand for a logical operation. */ |
0d7e008e SC |
4293 | |
4294 | int | |
4295 | logical_operand (op, mode) | |
4296 | rtx op; | |
4297 | enum machine_mode mode; | |
4298 | { | |
4299 | if (arith_reg_operand (op, mode)) | |
4300 | return 1; | |
4301 | ||
8aa2a305 JW |
4302 | if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) |
4303 | return 1; | |
4304 | ||
0d7e008e | 4305 | return 0; |
b9654711 | 4306 | } |
d3ae8277 | 4307 | |
1a95a963 JW |
4308 | /* Nonzero if OP is a floating point value with value 0.0. */ |
4309 | ||
4310 | int | |
4311 | fp_zero_operand (op) | |
4312 | rtx op; | |
d3ae8277 | 4313 | { |
1a95a963 | 4314 | REAL_VALUE_TYPE r; |
d3ae8277 | 4315 | |
1a95a963 JW |
4316 | if (GET_MODE (op) != SFmode) |
4317 | return 0; | |
4318 | ||
4319 | REAL_VALUE_FROM_CONST_DOUBLE (r, op); | |
e4fa6b06 | 4320 | return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r); |
d3ae8277 SC |
4321 | } |
4322 | ||
1a95a963 | 4323 | /* Nonzero if OP is a floating point value with value 1.0. */ |
d3ae8277 SC |
4324 | |
4325 | int | |
1a95a963 JW |
4326 | fp_one_operand (op) |
4327 | rtx op; | |
d3ae8277 | 4328 | { |
1a95a963 JW |
4329 | REAL_VALUE_TYPE r; |
4330 | ||
4331 | if (GET_MODE (op) != SFmode) | |
4332 | return 0; | |
4333 | ||
4334 | REAL_VALUE_FROM_CONST_DOUBLE (r, op); | |
4335 | return REAL_VALUES_EQUAL (r, dconst1); | |
d3ae8277 | 4336 | } |
1245df60 | 4337 | |
225e4f43 R |
4338 | int |
4339 | tertiary_reload_operand (op, mode) | |
4340 | rtx op; | |
4341 | enum machine_mode mode; | |
4342 | { | |
4343 | enum rtx_code code = GET_CODE (op); | |
4344 | return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE); | |
4345 | } | |
4346 | ||
4347 | int | |
4348 | fpscr_operand (op) | |
4349 | rtx op; | |
4350 | { | |
4351 | return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG | |
4352 | && GET_MODE (op) == PSImode); | |
4353 | } | |
4354 | ||
4355 | int | |
4356 | commutative_float_operator (op, mode) | |
4357 | rtx op; | |
4358 | enum machine_mode mode; | |
4359 | { | |
4360 | if (GET_MODE (op) != mode) | |
4361 | return 0; | |
4362 | switch (GET_CODE (op)) | |
4363 | { | |
4364 | case PLUS: | |
4365 | case MULT: | |
4366 | return 1; | |
4367 | } | |
4368 | return 0; | |
4369 | } | |
4370 | ||
4371 | int | |
4372 | noncommutative_float_operator (op, mode) | |
4373 | rtx op; | |
4374 | enum machine_mode mode; | |
4375 | { | |
4376 | if (GET_MODE (op) != mode) | |
4377 | return 0; | |
4378 | switch (GET_CODE (op)) | |
4379 | { | |
4380 | case MINUS: | |
4381 | case DIV: | |
4382 | return 1; | |
4383 | } | |
4384 | return 0; | |
4385 | } | |
4386 | ||
4387 | int | |
4388 | binary_float_operator (op, mode) | |
4389 | rtx op; | |
4390 | enum machine_mode mode; | |
4391 | { | |
4392 | if (GET_MODE (op) != mode) | |
4393 | return 0; | |
4394 | switch (GET_CODE (op)) | |
4395 | { | |
4396 | case PLUS: | |
4397 | case MINUS: | |
4398 | case MULT: | |
4399 | case DIV: | |
4400 | return 1; | |
4401 | } | |
4402 | return 0; | |
4403 | } | |
1245df60 | 4404 | \f |
33f7f353 | 4405 | /* Return the destination address of a branch. */ |
1245df60 R |
4406 | |
4407 | int | |
33f7f353 | 4408 | branch_dest (branch) |
1245df60 R |
4409 | rtx branch; |
4410 | { | |
33f7f353 JR |
4411 | rtx dest = SET_SRC (PATTERN (branch)); |
4412 | int dest_uid; | |
1245df60 R |
4413 | |
4414 | if (GET_CODE (dest) == IF_THEN_ELSE) | |
4415 | dest = XEXP (dest, 1); | |
4416 | dest = XEXP (dest, 0); | |
4417 | dest_uid = INSN_UID (dest); | |
33f7f353 | 4418 | return insn_addresses[dest_uid]; |
1245df60 | 4419 | } |
a55e9d2b RK |
4420 | \f |
4421 | /* Return non-zero if REG is not used after INSN. | |
4422 | We assume REG is a reload reg, and therefore does | |
8b760293 | 4423 | not live past labels. It may live past calls or jumps though. */ |
a55e9d2b RK |
4424 | int |
4425 | reg_unused_after (reg, insn) | |
4426 | rtx reg; | |
4427 | rtx insn; | |
4428 | { | |
8783b15e | 4429 | enum rtx_code code; |
a55e9d2b RK |
4430 | rtx set; |
4431 | ||
4432 | /* If the reg is set by this instruction, then it is safe for our | |
4433 | case. Disregard the case where this is a store to memory, since | |
4434 | we are checking a register used in the store address. */ | |
4435 | set = single_set (insn); | |
4436 | if (set && GET_CODE (SET_DEST (set)) != MEM | |
4437 | && reg_overlap_mentioned_p (reg, SET_DEST (set))) | |
4438 | return 1; | |
4439 | ||
4440 | while (insn = NEXT_INSN (insn)) | |
4441 | { | |
a55e9d2b | 4442 | code = GET_CODE (insn); |
8783b15e | 4443 | |
c8f6f18d RK |
4444 | #if 0 |
4445 | /* If this is a label that existed before reload, then the register | |
4446 | if dead here. However, if this is a label added by reorg, then | |
4447 | the register may still be live here. We can't tell the difference, | |
4448 | so we just ignore labels completely. */ | |
8783b15e | 4449 | if (code == CODE_LABEL) |
a55e9d2b | 4450 | return 1; |
c8f6f18d RK |
4451 | /* else */ |
4452 | #endif | |
a55e9d2b | 4453 | |
8b760293 JW |
4454 | if (code == JUMP_INSN) |
4455 | return 0; | |
4456 | ||
8783b15e RK |
4457 | /* If this is a sequence, we must handle them all at once. |
4458 | We could have for instance a call that sets the target register, | |
4459 | and a insn in a delay slot that uses the register. In this case, | |
4460 | we must return 0. */ | |
8b760293 | 4461 | else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) |
a55e9d2b | 4462 | { |
8783b15e RK |
4463 | int i; |
4464 | int retval = 0; | |
4465 | ||
4466 | for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) | |
4467 | { | |
4468 | rtx this_insn = XVECEXP (PATTERN (insn), 0, i); | |
4469 | rtx set = single_set (this_insn); | |
4470 | ||
4471 | if (GET_CODE (this_insn) == CALL_INSN) | |
4472 | code = CALL_INSN; | |
38f35781 JW |
4473 | else if (GET_CODE (this_insn) == JUMP_INSN) |
4474 | { | |
4475 | if (INSN_ANNULLED_BRANCH_P (this_insn)) | |
4476 | return 0; | |
4477 | code = JUMP_INSN; | |
4478 | } | |
a55e9d2b | 4479 | |
8783b15e RK |
4480 | if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) |
4481 | return 0; | |
4482 | if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) | |
4483 | { | |
4484 | if (GET_CODE (SET_DEST (set)) != MEM) | |
4485 | retval = 1; | |
4486 | else | |
4487 | return 0; | |
4488 | } | |
4489 | if (set == 0 | |
4490 | && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) | |
4491 | return 0; | |
4492 | } | |
4493 | if (retval == 1) | |
4494 | return 1; | |
38f35781 JW |
4495 | else if (code == JUMP_INSN) |
4496 | return 0; | |
8783b15e RK |
4497 | } |
4498 | else if (GET_RTX_CLASS (code) == 'i') | |
a55e9d2b RK |
4499 | { |
4500 | rtx set = single_set (insn); | |
4501 | ||
4502 | if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) | |
4503 | return 0; | |
4504 | if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) | |
4505 | return GET_CODE (SET_DEST (set)) != MEM; | |
4506 | if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) | |
4507 | return 0; | |
4508 | } | |
8783b15e RK |
4509 | |
4510 | if (code == CALL_INSN && call_used_regs[REGNO (reg)]) | |
4511 | return 1; | |
a55e9d2b RK |
4512 | } |
4513 | return 1; | |
4514 | } | |
225e4f43 R |
4515 | \f |
4516 | extern struct obstack permanent_obstack; | |
4517 | ||
4518 | rtx | |
4519 | get_fpscr_rtx () | |
4520 | { | |
4521 | static rtx fpscr_rtx; | |
4522 | ||
4523 | if (! fpscr_rtx) | |
4524 | { | |
4525 | push_obstacks (&permanent_obstack, &permanent_obstack); | |
4526 | fpscr_rtx = gen_rtx (REG, PSImode, 48); | |
4527 | REG_USERVAR_P (fpscr_rtx) = 1; | |
4528 | pop_obstacks (); | |
4529 | mark_user_reg (fpscr_rtx); | |
4530 | } | |
4531 | if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) | |
4532 | mark_user_reg (fpscr_rtx); | |
4533 | return fpscr_rtx; | |
4534 | } | |
4535 | ||
4536 | void | |
4537 | emit_sf_insn (pat) | |
4538 | rtx pat; | |
4539 | { | |
4540 | rtx addr; | |
4541 | /* When generating reload insns, we must not create new registers. FPSCR | |
4542 | should already have the correct value, so do nothing to change it. */ | |
4543 | if (! TARGET_FPU_SINGLE && ! reload_in_progress) | |
4544 | { | |
4545 | addr = gen_reg_rtx (SImode); | |
4546 | emit_insn (gen_fpu_switch0 (addr)); | |
4547 | } | |
4548 | emit_insn (pat); | |
4549 | if (! TARGET_FPU_SINGLE && ! reload_in_progress) | |
4550 | { | |
4551 | addr = gen_reg_rtx (SImode); | |
4552 | emit_insn (gen_fpu_switch1 (addr)); | |
4553 | } | |
4554 | } | |
4555 | ||
4556 | void | |
4557 | emit_df_insn (pat) | |
4558 | rtx pat; | |
4559 | { | |
4560 | rtx addr; | |
4561 | if (TARGET_FPU_SINGLE && ! reload_in_progress) | |
4562 | { | |
4563 | addr = gen_reg_rtx (SImode); | |
4564 | emit_insn (gen_fpu_switch0 (addr)); | |
4565 | } | |
4566 | emit_insn (pat); | |
4567 | if (TARGET_FPU_SINGLE && ! reload_in_progress) | |
4568 | { | |
4569 | addr = gen_reg_rtx (SImode); | |
4570 | emit_insn (gen_fpu_switch1 (addr)); | |
4571 | } | |
4572 | } | |
4573 | ||
4574 | void | |
4575 | expand_sf_unop (fun, operands) | |
4576 | rtx (*fun)(); | |
4577 | rtx *operands; | |
4578 | { | |
4579 | emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); | |
4580 | } | |
4581 | ||
4582 | void | |
4583 | expand_sf_binop (fun, operands) | |
4584 | rtx (*fun)(); | |
4585 | rtx *operands; | |
4586 | { | |
4587 | emit_sf_insn ((*fun) (operands[0], operands[1], operands[2], | |
4588 | get_fpscr_rtx ())); | |
4589 | } | |
4590 | ||
4591 | void | |
4592 | expand_df_unop (fun, operands) | |
4593 | rtx (*fun)(); | |
4594 | rtx *operands; | |
4595 | { | |
4596 | emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ())); | |
4597 | } | |
4598 | ||
4599 | void | |
4600 | expand_df_binop (fun, operands) | |
4601 | rtx (*fun)(); | |
4602 | rtx *operands; | |
4603 | { | |
4604 | emit_df_insn ((*fun) (operands[0], operands[1], operands[2], | |
4605 | get_fpscr_rtx ())); | |
4606 | } | |
4607 | ||
4608 | void | |
4609 | expand_fp_branch (compare, branch) | |
4610 | rtx (*compare) (), (*branch) (); | |
4611 | { | |
4612 | (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn) | |
4613 | ((*compare) ()); | |
4614 | emit_jump_insn ((*branch) ()); | |
4615 | } | |
4616 | \f | |
4617 | /* We don't want to make fpscr call-saved, because that would prevent | |
4618 | channging it, and it would also cost an exstra instruction to save it. | |
4619 | We don't want it to be known as a global register either, because | |
4620 | that disables all flow analysis. But it has to be live at the function | |
4621 | return. Thus, we need to insert a USE at the end of the function. */ | |
4622 | /* This should best be called at about the time FINALIZE_PIC is called, | |
4623 | but not dependent on flag_pic. Alas, there is no suitable hook there, | |
4624 | so this gets called from HAVE_RETURN. */ | |
4625 | int | |
4626 | emit_fpscr_use () | |
4627 | { | |
4628 | static int fpscr_uses = 0; | |
4629 | ||
4630 | if (rtx_equal_function_value_matters) | |
4631 | { | |
4632 | emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ())); | |
4633 | fpscr_uses++; | |
4634 | } | |
4635 | else | |
4636 | { | |
4637 | if (fpscr_uses > 1) | |
4638 | { | |
4639 | /* Due to he crude way we emit the USEs, we might end up with | |
4640 | some extra ones. Delete all but the last one. */ | |
4641 | rtx insn; | |
4642 | ||
4643 | for (insn = get_last_insn(); insn; insn = PREV_INSN (insn)) | |
4644 | if (GET_CODE (insn) == INSN | |
4645 | && GET_CODE (PATTERN (insn)) == USE | |
4646 | && GET_CODE (XEXP (PATTERN (insn), 0)) == REG | |
4647 | && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) | |
4648 | { | |
4649 | insn = PREV_INSN (insn); | |
4650 | break; | |
4651 | } | |
4652 | for (; insn; insn = PREV_INSN (insn)) | |
4653 | if (GET_CODE (insn) == INSN | |
4654 | && GET_CODE (PATTERN (insn)) == USE | |
4655 | && GET_CODE (XEXP (PATTERN (insn), 0)) == REG | |
4656 | && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) | |
4657 | { | |
4658 | PUT_CODE (insn, NOTE); | |
4659 | NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; | |
4660 | NOTE_SOURCE_FILE (insn) = 0; | |
4661 | } | |
4662 | } | |
4663 | fpscr_uses = 0; | |
4664 | } | |
4665 | } | |
4666 | \f | |
4667 | /* ??? gcc does flow analysis strictly after common subexpression | |
4668 | elimination. As a result, common subespression elimination fails | |
4669 | when there are some intervening statements setting the same register. | |
4670 | If we did nothing about this, this would hurt the precision switching | |
4671 | for SH4 badly. There is some cse after reload, but it is unable to | |
4672 | undo the extra register pressure from the unused instructions, and | |
4673 | it cannot remove auto-increment loads. | |
4674 | ||
4675 | A C code example that shows this flow/cse weakness for (at least) SH | |
4676 | and sparc (as of gcc ss-970706) is this: | |
4677 | ||
4678 | double | |
4679 | f(double a) | |
4680 | { | |
4681 | double d; | |
4682 | d = 0.1; | |
4683 | a += d; | |
4684 | d = 1.1; | |
4685 | d = 0.1; | |
4686 | a *= d; | |
4687 | return a; | |
4688 | } | |
4689 | ||
4690 | So we add another pass before common subexpression elimination, to | |
4691 | remove assignments that are dead due to a following assignment in the | |
4692 | same basic block. */ | |
4693 | ||
4694 | int sh_flag_remove_dead_before_cse; | |
4695 | ||
4696 | static void | |
4697 | mark_use (x, reg_set_block) | |
4698 | rtx x, *reg_set_block; | |
4699 | { | |
4700 | enum rtx_code code; | |
4701 | ||
4702 | if (! x) | |
4703 | return; | |
4704 | code = GET_CODE (x); | |
4705 | switch (code) | |
4706 | { | |
4707 | case REG: | |
4708 | { | |
4709 | int regno = REGNO (x); | |
4710 | int nregs = (regno < FIRST_PSEUDO_REGISTER | |
4711 | ? HARD_REGNO_NREGS (regno, GET_MODE (x)) | |
4712 | : 1); | |
4713 | do | |
4714 | { | |
4715 | reg_set_block[regno + nregs - 1] = 0; | |
4716 | } | |
4717 | while (--nregs); | |
4718 | break; | |
4719 | } | |
4720 | case SET: | |
4721 | { | |
4722 | rtx dest = SET_DEST (x); | |
4723 | ||
4724 | if (GET_CODE (dest) == SUBREG) | |
4725 | dest = SUBREG_REG (dest); | |
4726 | if (GET_CODE (dest) != REG) | |
4727 | mark_use (dest, reg_set_block); | |
4728 | mark_use (SET_SRC (x), reg_set_block); | |
4729 | break; | |
4730 | } | |
4731 | case CLOBBER: | |
4732 | break; | |
4733 | default: | |
4734 | { | |
6f7d635c | 4735 | const char *fmt = GET_RTX_FORMAT (code); |
225e4f43 R |
4736 | int i, j; |
4737 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
4738 | { | |
4739 | if (fmt[i] == 'e') | |
4740 | mark_use (XEXP (x, i), reg_set_block); | |
4741 | else if (fmt[i] == 'E') | |
4742 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) | |
4743 | mark_use (XVECEXP (x, i, j), reg_set_block); | |
4744 | } | |
4745 | break; | |
4746 | } | |
4747 | } | |
4748 | } | |
4749 | ||
4750 | int | |
4751 | remove_dead_before_cse () | |
4752 | { | |
4753 | rtx *reg_set_block, last, last_call, insn, set; | |
4754 | int in_libcall = 0; | |
4755 | ||
4756 | /* This pass should run just once, after rtl generation. */ | |
4757 | ||
4758 | if (! sh_flag_remove_dead_before_cse | |
4759 | || rtx_equal_function_value_matters | |
4760 | || reload_completed) | |
4761 | return; | |
4762 | ||
4763 | sh_flag_remove_dead_before_cse = 0; | |
4764 | ||
4765 | reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx)); | |
4766 | bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx)); | |
4767 | last_call = last = get_last_insn (); | |
4768 | for (insn = last; insn; insn = PREV_INSN (insn)) | |
4769 | { | |
4770 | if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') | |
4771 | continue; | |
4772 | if (GET_CODE (insn) == JUMP_INSN) | |
4773 | { | |
4774 | last_call = last = insn; | |
4775 | continue; | |
4776 | } | |
4777 | set = single_set (insn); | |
4778 | ||
4779 | /* Don't delete parts of libcalls, since that would confuse cse, loop | |
4780 | and flow. */ | |
4781 | if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) | |
4782 | in_libcall = 1; | |
4783 | else if (in_libcall) | |
4784 | { | |
4785 | if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) | |
4786 | in_libcall = 0; | |
4787 | } | |
4788 | else if (set && GET_CODE (SET_DEST (set)) == REG) | |
4789 | { | |
4790 | int regno = REGNO (SET_DEST (set)); | |
4791 | rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno] | |
4792 | ? last_call | |
4793 | : last); | |
4794 | if (reg_set_block[regno] == ref_insn | |
4795 | && (regno >= FIRST_PSEUDO_REGISTER | |
4796 | || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1) | |
4797 | && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn))) | |
4798 | { | |
4799 | PUT_CODE (insn, NOTE); | |
4800 | NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; | |
4801 | NOTE_SOURCE_FILE (insn) = 0; | |
4802 | continue; | |
4803 | } | |
4804 | else | |
4805 | reg_set_block[REGNO (SET_DEST (set))] = ref_insn; | |
4806 | } | |
4807 | if (GET_CODE (insn) == CALL_INSN) | |
4808 | { | |
4809 | last_call = insn; | |
4810 | mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block); | |
4811 | } | |
4812 | mark_use (PATTERN (insn), reg_set_block); | |
4813 | } | |
4814 | return 0; | |
4815 | } |