]>
Commit | Line | Data |
---|---|---|
ed8d2920 | 1 | /* Graph coloring register allocator |
e146f815 | 2 | Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc. |
ed8d2920 MM |
3 | Contributed by Michael Matz <matz@suse.de> |
4 | and Daniel Berlin <dan@cgsoftware.com>. | |
5 | ||
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it under the | |
9 | terms of the GNU General Public License as published by the Free Software | |
10 | Foundation; either version 2, or (at your option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
14 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
15 | details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License along | |
18 | with GCC; see the file COPYING. If not, write to the Free Software | |
19 | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
4977bab6 ZW |
23 | #include "coretypes.h" |
24 | #include "tm.h" | |
ed8d2920 MM |
25 | #include "rtl.h" |
26 | #include "tm_p.h" | |
27 | #include "insn-config.h" | |
28 | #include "recog.h" | |
29 | #include "reload.h" | |
30 | #include "integrate.h" | |
31 | #include "function.h" | |
32 | #include "regs.h" | |
33 | #include "obstack.h" | |
34 | #include "hard-reg-set.h" | |
35 | #include "basic-block.h" | |
36 | #include "df.h" | |
37 | #include "expr.h" | |
38 | #include "output.h" | |
39 | #include "toplev.h" | |
40 | #include "flags.h" | |
41 | #include "ra.h" | |
42 | ||
ed8d2920 MM |
43 | /* This is the toplevel file of a graph coloring register allocator. |
44 | It is able to act like a George & Appel allocator, i.e. with iterative | |
45 | coalescing plus spill coalescing/propagation. | |
46 | And it can act as a traditional Briggs allocator, although with | |
47 | optimistic coalescing. Additionally it has a custom pass, which | |
48 | tries to reduce the overall cost of the colored graph. | |
49 | ||
50 | We support two modes of spilling: spill-everywhere, which is extremely | |
51 | fast, and interference region spilling, which reduces spill code to a | |
52 | large extent, but is slower. | |
53 | ||
54 | Helpful documents: | |
55 | ||
56 | Briggs, P., Cooper, K. D., and Torczon, L. 1994. Improvements to graph | |
57 | coloring register allocation. ACM Trans. Program. Lang. Syst. 16, 3 (May), | |
58 | 428-455. | |
59 | ||
60 | Bergner, P., Dahl, P., Engebretsen, D., and O'Keefe, M. 1997. Spill code | |
61 | minimization via interference region spilling. In Proc. ACM SIGPLAN '97 | |
62 | Conf. on Prog. Language Design and Implementation. ACM, 287-295. | |
63 | ||
64 | George, L., Appel, A.W. 1996. Iterated register coalescing. | |
65 | ACM Trans. Program. Lang. Syst. 18, 3 (May), 300-324. | |
66 | ||
67 | */ | |
68 | ||
69 | /* This file contains the main entry point (reg_alloc), some helper routines | |
70 | used by more than one file of the register allocator, and the toplevel | |
71 | driver procedure (one_pass). */ | |
72 | ||
73 | /* Things, one might do somewhen: | |
74 | ||
75 | * Lattice based rematerialization | |
76 | * create definitions of ever-life regs at the beginning of | |
77 | the insn chain | |
d55d8fc7 | 78 | * insert loads as soon, stores as late as possible |
ed8d2920 MM |
79 | * insert spill insns as outward as possible (either looptree, or LCM) |
80 | * reuse stack-slots | |
81 | * delete coalesced insns. Partly done. The rest can only go, when we get | |
82 | rid of reload. | |
83 | * don't destroy coalescing information completely when spilling | |
84 | * use the constraints from asms | |
85 | */ | |
86 | ||
727d709b | 87 | static int first_hard_reg (HARD_REG_SET); |
ed8d2920 | 88 | static struct obstack ra_obstack; |
93bad80e SB |
89 | static void create_insn_info (struct df *); |
90 | static void free_insn_info (void); | |
91 | static void alloc_mem (struct df *); | |
92 | static void free_mem (struct df *); | |
93 | static void free_all_mem (struct df *df); | |
94 | static int one_pass (struct df *, int); | |
95 | static void check_df (struct df *); | |
96 | static void init_ra (void); | |
ed8d2920 | 97 | |
93bad80e | 98 | void reg_alloc (void); |
ed8d2920 MM |
99 | |
100 | /* These global variables are "internal" to the register allocator. | |
101 | They are all documented at their declarations in ra.h. */ | |
102 | ||
103 | /* Somewhen we want to get rid of one of those sbitmaps. | |
104 | (for now I need the sup_igraph to note if there is any conflict between | |
105 | parts of webs at all. I can't use igraph for this, as there only the real | |
106 | conflicts are noted.) This is only used to prevent coalescing two | |
107 | conflicting webs, were only parts of them are in conflict. */ | |
108 | sbitmap igraph; | |
109 | sbitmap sup_igraph; | |
110 | ||
111 | /* Note the insns not inserted by the allocator, where we detected any | |
112 | deaths of pseudos. It is used to detect closeness of defs and uses. | |
113 | In the first pass this is empty (we could initialize it from REG_DEAD | |
114 | notes), in the other passes it is left from the pass before. */ | |
115 | sbitmap insns_with_deaths; | |
116 | int death_insns_max_uid; | |
117 | ||
118 | struct web_part *web_parts; | |
119 | ||
120 | unsigned int num_webs; | |
121 | unsigned int num_subwebs; | |
122 | unsigned int num_allwebs; | |
123 | struct web **id2web; | |
124 | struct web *hardreg2web[FIRST_PSEUDO_REGISTER]; | |
125 | struct web **def2web; | |
126 | struct web **use2web; | |
127 | struct move_list *wl_moves; | |
128 | int ra_max_regno; | |
129 | short *ra_reg_renumber; | |
130 | struct df *df; | |
131 | bitmap *live_at_end; | |
132 | int ra_pass; | |
133 | unsigned int max_normal_pseudo; | |
134 | int an_unusable_color; | |
135 | ||
136 | /* The different lists on which a web can be (based on the type). */ | |
137 | struct dlist *web_lists[(int) LAST_NODE_TYPE]; | |
138 | ||
139 | unsigned int last_def_id; | |
140 | unsigned int last_use_id; | |
141 | unsigned int last_num_webs; | |
142 | int last_max_uid; | |
143 | sbitmap last_check_uses; | |
144 | unsigned int remember_conflicts; | |
145 | ||
146 | int orig_max_uid; | |
147 | ||
148 | HARD_REG_SET never_use_colors; | |
149 | HARD_REG_SET usable_regs[N_REG_CLASSES]; | |
150 | unsigned int num_free_regs[N_REG_CLASSES]; | |
727d709b | 151 | int single_reg_in_regclass[N_REG_CLASSES]; |
ed8d2920 | 152 | HARD_REG_SET hardregs_for_mode[NUM_MACHINE_MODES]; |
50aac998 | 153 | HARD_REG_SET invalid_mode_change_regs; |
ed8d2920 MM |
154 | unsigned char byte2bitcount[256]; |
155 | ||
156 | unsigned int debug_new_regalloc = -1; | |
157 | int flag_ra_biased = 0; | |
158 | int flag_ra_improved_spilling = 0; | |
159 | int flag_ra_ir_spilling = 0; | |
160 | int flag_ra_optimistic_coalescing = 0; | |
161 | int flag_ra_break_aliases = 0; | |
162 | int flag_ra_merge_spill_costs = 0; | |
163 | int flag_ra_spill_every_use = 0; | |
164 | int flag_ra_dump_notes = 0; | |
165 | ||
166 | /* Fast allocation of small objects, which live until the allocator | |
167 | is done. Allocate an object of SIZE bytes. */ | |
168 | ||
169 | void * | |
93bad80e | 170 | ra_alloc (size_t size) |
ed8d2920 MM |
171 | { |
172 | return obstack_alloc (&ra_obstack, size); | |
173 | } | |
174 | ||
175 | /* Like ra_alloc(), but clear the returned memory. */ | |
176 | ||
177 | void * | |
93bad80e | 178 | ra_calloc (size_t size) |
ed8d2920 MM |
179 | { |
180 | void *p = obstack_alloc (&ra_obstack, size); | |
181 | memset (p, 0, size); | |
182 | return p; | |
183 | } | |
184 | ||
185 | /* Returns the number of hardregs in HARD_REG_SET RS. */ | |
186 | ||
187 | int | |
93bad80e | 188 | hard_regs_count (HARD_REG_SET rs) |
ed8d2920 MM |
189 | { |
190 | int count = 0; | |
191 | #ifdef HARD_REG_SET | |
192 | while (rs) | |
193 | { | |
194 | unsigned char byte = rs & 0xFF; | |
195 | rs >>= 8; | |
196 | /* Avoid memory access, if nothing is set. */ | |
197 | if (byte) | |
198 | count += byte2bitcount[byte]; | |
199 | } | |
200 | #else | |
201 | unsigned int ofs; | |
202 | for (ofs = 0; ofs < HARD_REG_SET_LONGS; ofs++) | |
203 | { | |
204 | HARD_REG_ELT_TYPE elt = rs[ofs]; | |
205 | while (elt) | |
206 | { | |
207 | unsigned char byte = elt & 0xFF; | |
208 | elt >>= 8; | |
209 | if (byte) | |
210 | count += byte2bitcount[byte]; | |
211 | } | |
212 | } | |
213 | #endif | |
214 | return count; | |
215 | } | |
216 | ||
727d709b PH |
217 | /* Returns the first hardreg in HARD_REG_SET RS. Assumes there is at |
218 | least one reg in the set. */ | |
219 | ||
220 | static int | |
221 | first_hard_reg (HARD_REG_SET rs) | |
222 | { | |
223 | int c; | |
224 | for (c = 0; c < FIRST_PSEUDO_REGISTER && !TEST_HARD_REG_BIT (rs, c); c++) | |
225 | if (c == FIRST_PSEUDO_REGISTER) | |
226 | abort(); | |
227 | return c; | |
228 | } | |
229 | ||
ed8d2920 MM |
230 | /* Basically like emit_move_insn (i.e. validifies constants and such), |
231 | but also handle MODE_CC moves (but then the operands must already | |
232 | be basically valid. */ | |
233 | ||
234 | rtx | |
93bad80e | 235 | ra_emit_move_insn (rtx x, rtx y) |
ed8d2920 MM |
236 | { |
237 | enum machine_mode mode = GET_MODE (x); | |
238 | if (GET_MODE_CLASS (mode) == MODE_CC) | |
239 | return emit_insn (gen_move_insn (x, y)); | |
240 | else | |
241 | return emit_move_insn (x, y); | |
242 | } | |
243 | ||
244 | int insn_df_max_uid; | |
245 | struct ra_insn_info *insn_df; | |
246 | static struct ref **refs_for_insn_df; | |
247 | ||
248 | /* Create the insn_df structure for each insn to have fast access to | |
249 | all valid defs and uses in an insn. */ | |
250 | ||
251 | static void | |
93bad80e | 252 | create_insn_info (struct df *df) |
ed8d2920 MM |
253 | { |
254 | rtx insn; | |
255 | struct ref **act_refs; | |
256 | insn_df_max_uid = get_max_uid (); | |
257 | insn_df = xcalloc (insn_df_max_uid, sizeof (insn_df[0])); | |
258 | refs_for_insn_df = xcalloc (df->def_id + df->use_id, sizeof (struct ref *)); | |
259 | act_refs = refs_for_insn_df; | |
260 | /* We create those things backwards to mimic the order in which | |
261 | the insns are visited in rewrite_program2() and live_in(). */ | |
262 | for (insn = get_last_insn (); insn; insn = PREV_INSN (insn)) | |
263 | { | |
264 | int uid = INSN_UID (insn); | |
265 | unsigned int n; | |
266 | struct df_link *link; | |
267 | if (!INSN_P (insn)) | |
268 | continue; | |
269 | for (n = 0, link = DF_INSN_DEFS (df, insn); link; link = link->next) | |
270 | if (link->ref | |
271 | && (DF_REF_REGNO (link->ref) >= FIRST_PSEUDO_REGISTER | |
272 | || !TEST_HARD_REG_BIT (never_use_colors, | |
273 | DF_REF_REGNO (link->ref)))) | |
274 | { | |
275 | if (n == 0) | |
276 | insn_df[uid].defs = act_refs; | |
277 | insn_df[uid].defs[n++] = link->ref; | |
278 | } | |
279 | act_refs += n; | |
280 | insn_df[uid].num_defs = n; | |
281 | for (n = 0, link = DF_INSN_USES (df, insn); link; link = link->next) | |
282 | if (link->ref | |
283 | && (DF_REF_REGNO (link->ref) >= FIRST_PSEUDO_REGISTER | |
284 | || !TEST_HARD_REG_BIT (never_use_colors, | |
285 | DF_REF_REGNO (link->ref)))) | |
286 | { | |
287 | if (n == 0) | |
288 | insn_df[uid].uses = act_refs; | |
289 | insn_df[uid].uses[n++] = link->ref; | |
290 | } | |
291 | act_refs += n; | |
292 | insn_df[uid].num_uses = n; | |
293 | } | |
294 | if (refs_for_insn_df + (df->def_id + df->use_id) < act_refs) | |
295 | abort (); | |
296 | } | |
297 | ||
298 | /* Free the insn_df structures. */ | |
299 | ||
300 | static void | |
93bad80e | 301 | free_insn_info (void) |
ed8d2920 MM |
302 | { |
303 | free (refs_for_insn_df); | |
304 | refs_for_insn_df = NULL; | |
305 | free (insn_df); | |
306 | insn_df = NULL; | |
307 | insn_df_max_uid = 0; | |
308 | } | |
309 | ||
310 | /* Search WEB for a subweb, which represents REG. REG needs to | |
311 | be a SUBREG, and the inner reg of it needs to be the one which is | |
312 | represented by WEB. Returns the matching subweb or NULL. */ | |
313 | ||
314 | struct web * | |
93bad80e | 315 | find_subweb (struct web *web, rtx reg) |
ed8d2920 MM |
316 | { |
317 | struct web *w; | |
318 | if (GET_CODE (reg) != SUBREG) | |
319 | abort (); | |
320 | for (w = web->subreg_next; w; w = w->subreg_next) | |
321 | if (GET_MODE (w->orig_x) == GET_MODE (reg) | |
322 | && SUBREG_BYTE (w->orig_x) == SUBREG_BYTE (reg)) | |
323 | return w; | |
324 | return NULL; | |
325 | } | |
326 | ||
327 | /* Similar to find_subweb(), but matches according to SIZE_WORD, | |
328 | a collection of the needed size and offset (in bytes). */ | |
329 | ||
330 | struct web * | |
93bad80e | 331 | find_subweb_2 (struct web *web, unsigned int size_word) |
ed8d2920 MM |
332 | { |
333 | struct web *w = web; | |
334 | if (size_word == GET_MODE_SIZE (GET_MODE (web->orig_x))) | |
335 | /* size_word == size means BYTE_BEGIN(size_word) == 0. */ | |
336 | return web; | |
337 | for (w = web->subreg_next; w; w = w->subreg_next) | |
338 | { | |
339 | unsigned int bl = rtx_to_bits (w->orig_x); | |
340 | if (size_word == bl) | |
341 | return w; | |
342 | } | |
343 | return NULL; | |
344 | } | |
345 | ||
346 | /* Returns the superweb for SUBWEB. */ | |
347 | ||
348 | struct web * | |
93bad80e | 349 | find_web_for_subweb_1 (struct web *subweb) |
ed8d2920 MM |
350 | { |
351 | while (subweb->parent_web) | |
352 | subweb = subweb->parent_web; | |
353 | return subweb; | |
354 | } | |
355 | ||
356 | /* Determine if two hard register sets intersect. | |
357 | Return 1 if they do. */ | |
358 | ||
359 | int | |
93bad80e | 360 | hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b) |
ed8d2920 MM |
361 | { |
362 | HARD_REG_SET c; | |
363 | COPY_HARD_REG_SET (c, *a); | |
364 | AND_HARD_REG_SET (c, *b); | |
365 | GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose); | |
366 | return 1; | |
367 | lose: | |
368 | return 0; | |
369 | } | |
370 | ||
371 | /* Allocate and initialize the memory necessary for one pass of the | |
372 | register allocator. */ | |
373 | ||
374 | static void | |
93bad80e | 375 | alloc_mem (struct df *df) |
ed8d2920 MM |
376 | { |
377 | int i; | |
378 | ra_build_realloc (df); | |
379 | if (!live_at_end) | |
380 | { | |
703ad42b | 381 | live_at_end = xmalloc ((last_basic_block + 2) * sizeof (bitmap)); |
ed8d2920 MM |
382 | for (i = 0; i < last_basic_block + 2; i++) |
383 | live_at_end[i] = BITMAP_XMALLOC (); | |
384 | live_at_end += 2; | |
385 | } | |
386 | create_insn_info (df); | |
387 | } | |
388 | ||
389 | /* Free the memory which isn't necessary for the next pass. */ | |
390 | ||
391 | static void | |
93bad80e | 392 | free_mem (struct df *df ATTRIBUTE_UNUSED) |
ed8d2920 MM |
393 | { |
394 | free_insn_info (); | |
395 | ra_build_free (); | |
396 | } | |
397 | ||
398 | /* Free all memory allocated for the register allocator. Used, when | |
399 | it's done. */ | |
400 | ||
401 | static void | |
93bad80e | 402 | free_all_mem (struct df *df) |
ed8d2920 MM |
403 | { |
404 | unsigned int i; | |
405 | live_at_end -= 2; | |
406 | for (i = 0; i < (unsigned)last_basic_block + 2; i++) | |
407 | BITMAP_XFREE (live_at_end[i]); | |
408 | free (live_at_end); | |
409 | ||
410 | ra_colorize_free_all (); | |
411 | ra_build_free_all (df); | |
412 | obstack_free (&ra_obstack, NULL); | |
413 | } | |
414 | ||
415 | static long ticks_build; | |
416 | static long ticks_rebuild; | |
417 | ||
418 | /* Perform one pass of allocation. Returns nonzero, if some spill code | |
419 | was added, i.e. if the allocator needs to rerun. */ | |
420 | ||
421 | static int | |
93bad80e | 422 | one_pass (struct df *df, int rebuild) |
ed8d2920 MM |
423 | { |
424 | long ticks = clock (); | |
425 | int something_spilled; | |
426 | remember_conflicts = 0; | |
427 | ||
428 | /* Build the complete interference graph, or if this is not the first | |
429 | pass, rebuild it incrementally. */ | |
430 | build_i_graph (df); | |
431 | ||
432 | /* From now on, if we create new conflicts, we need to remember the | |
433 | initial list of conflicts per web. */ | |
434 | remember_conflicts = 1; | |
435 | if (!rebuild) | |
436 | dump_igraph_machine (); | |
437 | ||
438 | /* Colorize the I-graph. This results in either a list of | |
439 | spilled_webs, in which case we need to run the spill phase, and | |
440 | rerun the allocator, or that list is empty, meaning we are done. */ | |
441 | ra_colorize_graph (df); | |
442 | ||
443 | last_max_uid = get_max_uid (); | |
444 | /* actual_spill() might change WEBS(SPILLED) and even empty it, | |
445 | so we need to remember it's state. */ | |
446 | something_spilled = !!WEBS(SPILLED); | |
447 | ||
448 | /* Add spill code if necessary. */ | |
449 | if (something_spilled) | |
450 | actual_spill (); | |
451 | ||
452 | ticks = clock () - ticks; | |
453 | if (rebuild) | |
454 | ticks_rebuild += ticks; | |
455 | else | |
456 | ticks_build += ticks; | |
457 | return something_spilled; | |
458 | } | |
459 | ||
460 | /* Initialize various arrays for the register allocator. */ | |
461 | ||
462 | static void | |
93bad80e | 463 | init_ra (void) |
ed8d2920 MM |
464 | { |
465 | int i; | |
466 | HARD_REG_SET rs; | |
467 | #ifdef ELIMINABLE_REGS | |
d3969c34 | 468 | static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS; |
ed8d2920 MM |
469 | unsigned int j; |
470 | #endif | |
471 | int need_fp | |
472 | = (! flag_omit_frame_pointer | |
ed8d2920 | 473 | || (current_function_calls_alloca && EXIT_IGNORE_STACK) |
ed8d2920 MM |
474 | || FRAME_POINTER_REQUIRED); |
475 | ||
476 | ra_colorize_init (); | |
477 | ||
478 | /* We can't ever use any of the fixed regs. */ | |
479 | COPY_HARD_REG_SET (never_use_colors, fixed_reg_set); | |
480 | ||
481 | /* Additionally don't even try to use hardregs, which we already | |
482 | know are not eliminable. This includes also either the | |
483 | hard framepointer or all regs which are eliminable into the | |
484 | stack pointer, if need_fp is set. */ | |
485 | #ifdef ELIMINABLE_REGS | |
486 | for (j = 0; j < ARRAY_SIZE (eliminables); j++) | |
487 | { | |
488 | if (! CAN_ELIMINATE (eliminables[j].from, eliminables[j].to) | |
489 | || (eliminables[j].to == STACK_POINTER_REGNUM && need_fp)) | |
66fd46b6 | 490 | for (i = hard_regno_nregs[eliminables[j].from][Pmode]; i--;) |
ed8d2920 MM |
491 | SET_HARD_REG_BIT (never_use_colors, eliminables[j].from + i); |
492 | } | |
493 | #if FRAME_POINTER_REGNUM != HARD_FRAME_POINTER_REGNUM | |
494 | if (need_fp) | |
66fd46b6 | 495 | for (i = hard_regno_nregs[HARD_FRAME_POINTER_REGNUM][Pmode]; i--;) |
ed8d2920 MM |
496 | SET_HARD_REG_BIT (never_use_colors, HARD_FRAME_POINTER_REGNUM + i); |
497 | #endif | |
498 | ||
499 | #else | |
500 | if (need_fp) | |
66fd46b6 | 501 | for (i = hard_regno_nregs[FRAME_POINTER_REGNUM][Pmode]; i--;) |
ed8d2920 MM |
502 | SET_HARD_REG_BIT (never_use_colors, FRAME_POINTER_REGNUM + i); |
503 | #endif | |
504 | ||
505 | /* Stack and argument pointer are also rather useless to us. */ | |
66fd46b6 | 506 | for (i = hard_regno_nregs[STACK_POINTER_REGNUM][Pmode]; i--;) |
ed8d2920 MM |
507 | SET_HARD_REG_BIT (never_use_colors, STACK_POINTER_REGNUM + i); |
508 | ||
66fd46b6 | 509 | for (i = hard_regno_nregs[ARG_POINTER_REGNUM][Pmode]; i--;) |
ed8d2920 MM |
510 | SET_HARD_REG_BIT (never_use_colors, ARG_POINTER_REGNUM + i); |
511 | ||
512 | for (i = 0; i < 256; i++) | |
513 | { | |
514 | unsigned char byte = ((unsigned) i) & 0xFF; | |
515 | unsigned char count = 0; | |
516 | while (byte) | |
517 | { | |
518 | if (byte & 1) | |
519 | count++; | |
520 | byte >>= 1; | |
521 | } | |
522 | byte2bitcount[i] = count; | |
523 | } | |
524 | ||
525 | for (i = 0; i < N_REG_CLASSES; i++) | |
526 | { | |
527 | int size; | |
528 | COPY_HARD_REG_SET (rs, reg_class_contents[i]); | |
529 | AND_COMPL_HARD_REG_SET (rs, never_use_colors); | |
530 | size = hard_regs_count (rs); | |
531 | num_free_regs[i] = size; | |
532 | COPY_HARD_REG_SET (usable_regs[i], rs); | |
727d709b PH |
533 | if (size == 1) |
534 | single_reg_in_regclass[i] = first_hard_reg (rs); | |
535 | else | |
536 | single_reg_in_regclass[i] = -1; | |
ed8d2920 MM |
537 | } |
538 | ||
539 | /* Setup hardregs_for_mode[]. | |
540 | We are not interested only in the beginning of a multi-reg, but in | |
541 | all the hardregs involved. Maybe HARD_REGNO_MODE_OK() only ok's | |
542 | for beginnings. */ | |
543 | for (i = 0; i < NUM_MACHINE_MODES; i++) | |
544 | { | |
545 | int reg, size; | |
546 | CLEAR_HARD_REG_SET (rs); | |
547 | for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) | |
548 | if (HARD_REGNO_MODE_OK (reg, i) | |
549 | /* Ignore VOIDmode and similar things. */ | |
66fd46b6 | 550 | && (size = hard_regno_nregs[reg][i]) != 0 |
ed8d2920 MM |
551 | && (reg + size) <= FIRST_PSEUDO_REGISTER) |
552 | { | |
553 | while (size--) | |
554 | SET_HARD_REG_BIT (rs, reg + size); | |
555 | } | |
556 | COPY_HARD_REG_SET (hardregs_for_mode[i], rs); | |
557 | } | |
558 | ||
50aac998 MM |
559 | CLEAR_HARD_REG_SET (invalid_mode_change_regs); |
560 | #ifdef CANNOT_CHANGE_MODE_CLASS | |
561 | if (0) | |
562 | for (i = 0; i < NUM_MACHINE_MODES; i++) | |
563 | { | |
564 | enum machine_mode from = (enum machine_mode) i; | |
565 | enum machine_mode to; | |
566 | for (to = VOIDmode; to < MAX_MACHINE_MODE; ++to) | |
567 | { | |
568 | int r; | |
569 | for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) | |
570 | if (REG_CANNOT_CHANGE_MODE_P (from, to, r)) | |
571 | SET_HARD_REG_BIT (invalid_mode_change_regs, r); | |
572 | } | |
573 | } | |
574 | #endif | |
575 | ||
ed8d2920 MM |
576 | for (an_unusable_color = 0; an_unusable_color < FIRST_PSEUDO_REGISTER; |
577 | an_unusable_color++) | |
578 | if (TEST_HARD_REG_BIT (never_use_colors, an_unusable_color)) | |
579 | break; | |
580 | if (an_unusable_color == FIRST_PSEUDO_REGISTER) | |
581 | abort (); | |
582 | ||
583 | orig_max_uid = get_max_uid (); | |
584 | compute_bb_for_insn (); | |
585 | ra_reg_renumber = NULL; | |
586 | insns_with_deaths = sbitmap_alloc (orig_max_uid); | |
587 | death_insns_max_uid = orig_max_uid; | |
588 | sbitmap_ones (insns_with_deaths); | |
589 | gcc_obstack_init (&ra_obstack); | |
590 | } | |
591 | ||
592 | /* Check the consistency of DF. This aborts if it violates some | |
593 | invariances we expect. */ | |
594 | ||
595 | static void | |
93bad80e | 596 | check_df (struct df *df) |
ed8d2920 MM |
597 | { |
598 | struct df_link *link; | |
599 | rtx insn; | |
600 | int regno; | |
601 | unsigned int ui; | |
602 | bitmap b = BITMAP_XMALLOC (); | |
603 | bitmap empty_defs = BITMAP_XMALLOC (); | |
604 | bitmap empty_uses = BITMAP_XMALLOC (); | |
605 | ||
606 | /* Collect all the IDs of NULL references in the ID->REF arrays, | |
607 | as df.c leaves them when updating the df structure. */ | |
608 | for (ui = 0; ui < df->def_id; ui++) | |
609 | if (!df->defs[ui]) | |
610 | bitmap_set_bit (empty_defs, ui); | |
611 | for (ui = 0; ui < df->use_id; ui++) | |
612 | if (!df->uses[ui]) | |
613 | bitmap_set_bit (empty_uses, ui); | |
614 | ||
615 | /* For each insn we check if the chain of references contain each | |
616 | ref only once, doesn't contain NULL refs, or refs whose ID is invalid | |
617 | (it df->refs[id] element is NULL). */ | |
618 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
619 | if (INSN_P (insn)) | |
620 | { | |
621 | bitmap_clear (b); | |
622 | for (link = DF_INSN_DEFS (df, insn); link; link = link->next) | |
623 | if (!link->ref || bitmap_bit_p (empty_defs, DF_REF_ID (link->ref)) | |
624 | || bitmap_bit_p (b, DF_REF_ID (link->ref))) | |
625 | abort (); | |
626 | else | |
627 | bitmap_set_bit (b, DF_REF_ID (link->ref)); | |
628 | ||
629 | bitmap_clear (b); | |
630 | for (link = DF_INSN_USES (df, insn); link; link = link->next) | |
631 | if (!link->ref || bitmap_bit_p (empty_uses, DF_REF_ID (link->ref)) | |
632 | || bitmap_bit_p (b, DF_REF_ID (link->ref))) | |
633 | abort (); | |
634 | else | |
635 | bitmap_set_bit (b, DF_REF_ID (link->ref)); | |
636 | } | |
637 | ||
638 | /* Now the same for the chains per register number. */ | |
639 | for (regno = 0; regno < max_reg_num (); regno++) | |
640 | { | |
641 | bitmap_clear (b); | |
642 | for (link = df->regs[regno].defs; link; link = link->next) | |
643 | if (!link->ref || bitmap_bit_p (empty_defs, DF_REF_ID (link->ref)) | |
644 | || bitmap_bit_p (b, DF_REF_ID (link->ref))) | |
645 | abort (); | |
646 | else | |
647 | bitmap_set_bit (b, DF_REF_ID (link->ref)); | |
648 | ||
649 | bitmap_clear (b); | |
650 | for (link = df->regs[regno].uses; link; link = link->next) | |
651 | if (!link->ref || bitmap_bit_p (empty_uses, DF_REF_ID (link->ref)) | |
652 | || bitmap_bit_p (b, DF_REF_ID (link->ref))) | |
653 | abort (); | |
654 | else | |
655 | bitmap_set_bit (b, DF_REF_ID (link->ref)); | |
656 | } | |
657 | ||
658 | BITMAP_XFREE (empty_uses); | |
659 | BITMAP_XFREE (empty_defs); | |
660 | BITMAP_XFREE (b); | |
661 | } | |
662 | ||
663 | /* Main register allocator entry point. */ | |
664 | ||
665 | void | |
93bad80e | 666 | reg_alloc (void) |
ed8d2920 MM |
667 | { |
668 | int changed; | |
c263766c | 669 | FILE *ra_dump_file = dump_file; |
ed8d2920 MM |
670 | rtx last = get_last_insn (); |
671 | ||
672 | if (! INSN_P (last)) | |
673 | last = prev_real_insn (last); | |
674 | /* If this is an empty function we shouldn't do all the following, | |
675 | but instead just setup what's necessary, and return. */ | |
676 | ||
d55d8fc7 | 677 | /* We currently rely on the existence of the return value USE as |
ed8d2920 MM |
678 | one of the last insns. Add it if it's not there anymore. */ |
679 | if (last) | |
680 | { | |
681 | edge e; | |
682 | for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) | |
683 | { | |
684 | basic_block bb = e->src; | |
a813c111 | 685 | last = BB_END (bb); |
ed8d2920 MM |
686 | if (!INSN_P (last) || GET_CODE (PATTERN (last)) != USE) |
687 | { | |
688 | rtx insns; | |
689 | start_sequence (); | |
690 | use_return_register (); | |
691 | insns = get_insns (); | |
692 | end_sequence (); | |
693 | emit_insn_after (insns, last); | |
694 | } | |
695 | } | |
696 | } | |
697 | ||
698 | /* Setup debugging levels. */ | |
699 | switch (0) | |
700 | { | |
3d042e77 | 701 | /* Some useful presets of the debug level, I often use. */ |
ed8d2920 MM |
702 | case 0: debug_new_regalloc = DUMP_EVER; break; |
703 | case 1: debug_new_regalloc = DUMP_COSTS; break; | |
704 | case 2: debug_new_regalloc = DUMP_IGRAPH_M; break; | |
705 | case 3: debug_new_regalloc = DUMP_COLORIZE + DUMP_COSTS; break; | |
706 | case 4: debug_new_regalloc = DUMP_COLORIZE + DUMP_COSTS + DUMP_WEBS; | |
707 | break; | |
708 | case 5: debug_new_regalloc = DUMP_FINAL_RTL + DUMP_COSTS + | |
709 | DUMP_CONSTRAINTS; | |
710 | break; | |
711 | case 6: debug_new_regalloc = DUMP_VALIDIFY; break; | |
712 | } | |
c263766c | 713 | if (!dump_file) |
ed8d2920 MM |
714 | debug_new_regalloc = 0; |
715 | ||
716 | /* Run regclass first, so we know the preferred and alternate classes | |
717 | for each pseudo. Deactivate emitting of debug info, if it's not | |
d55d8fc7 | 718 | explicitly requested. */ |
ed8d2920 | 719 | if ((debug_new_regalloc & DUMP_REGCLASS) == 0) |
c263766c RH |
720 | dump_file = NULL; |
721 | regclass (get_insns (), max_reg_num (), dump_file); | |
722 | dump_file = ra_dump_file; | |
ed8d2920 MM |
723 | |
724 | /* We don't use those NOTEs, and as we anyway change all registers, | |
725 | they only make problems later. */ | |
726 | count_or_remove_death_notes (NULL, 1); | |
727 | ||
728 | /* Initialize the different global arrays and regsets. */ | |
729 | init_ra (); | |
730 | ||
731 | /* And some global variables. */ | |
732 | ra_pass = 0; | |
733 | no_new_pseudos = 0; | |
734 | max_normal_pseudo = (unsigned) max_reg_num (); | |
735 | ra_rewrite_init (); | |
736 | last_def_id = 0; | |
737 | last_use_id = 0; | |
738 | last_num_webs = 0; | |
739 | last_max_uid = 0; | |
740 | last_check_uses = NULL; | |
741 | live_at_end = NULL; | |
742 | WEBS(INITIAL) = NULL; | |
743 | WEBS(FREE) = NULL; | |
744 | memset (hardreg2web, 0, sizeof (hardreg2web)); | |
745 | ticks_build = ticks_rebuild = 0; | |
746 | ||
747 | /* The default is to use optimistic coalescing with interference | |
748 | region spilling, without biased coloring. */ | |
749 | flag_ra_biased = 0; | |
750 | flag_ra_spill_every_use = 0; | |
751 | flag_ra_improved_spilling = 1; | |
752 | flag_ra_ir_spilling = 1; | |
753 | flag_ra_break_aliases = 0; | |
754 | flag_ra_optimistic_coalescing = 1; | |
755 | flag_ra_merge_spill_costs = 1; | |
756 | if (flag_ra_optimistic_coalescing) | |
757 | flag_ra_break_aliases = 1; | |
758 | flag_ra_dump_notes = 0; | |
759 | ||
760 | /* Allocate the global df structure. */ | |
761 | df = df_init (); | |
762 | ||
763 | /* This is the main loop, calling one_pass as long as there are still | |
764 | some spilled webs. */ | |
765 | do | |
766 | { | |
767 | ra_debug_msg (DUMP_NEARLY_EVER, "RegAlloc Pass %d\n\n", ra_pass); | |
768 | if (ra_pass++ > 40) | |
769 | internal_error ("Didn't find a coloring.\n"); | |
770 | ||
771 | /* First collect all the register refs and put them into | |
772 | chains per insn, and per regno. In later passes only update | |
773 | that info from the new and modified insns. */ | |
b57f2e10 | 774 | df_analyze (df, (ra_pass == 1) ? 0 : (bitmap) -1, |
50aac998 | 775 | DF_HARD_REGS | DF_RD_CHAIN | DF_RU_CHAIN | DF_FOR_REGALLOC); |
ed8d2920 MM |
776 | |
777 | if ((debug_new_regalloc & DUMP_DF) != 0) | |
778 | { | |
779 | rtx insn; | |
c263766c | 780 | df_dump (df, DF_HARD_REGS, dump_file); |
ed8d2920 MM |
781 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) |
782 | if (INSN_P (insn)) | |
c263766c | 783 | df_insn_debug_regno (df, insn, dump_file); |
ed8d2920 MM |
784 | } |
785 | check_df (df); | |
786 | ||
787 | /* Now allocate the memory needed for this pass, or (if it's not the | |
788 | first pass), reallocate only additional memory. */ | |
789 | alloc_mem (df); | |
790 | ||
791 | /* Build and colorize the interference graph, and possibly emit | |
792 | spill insns. This also might delete certain move insns. */ | |
793 | changed = one_pass (df, ra_pass > 1); | |
794 | ||
795 | /* If that produced no changes, the graph was colorizable. */ | |
796 | if (!changed) | |
797 | { | |
798 | /* Change the insns to refer to the new pseudos (one per web). */ | |
799 | emit_colors (df); | |
800 | /* Already setup a preliminary reg_renumber[] array, but don't | |
801 | free our own version. reg_renumber[] will again be destroyed | |
802 | later. We right now need it in dump_constraints() for | |
803 | constrain_operands(1) whose subproc sometimes reference | |
804 | it (because we are checking strictly, i.e. as if | |
805 | after reload). */ | |
806 | setup_renumber (0); | |
807 | /* Delete some more of the coalesced moves. */ | |
808 | delete_moves (); | |
809 | dump_constraints (); | |
810 | } | |
811 | else | |
812 | { | |
813 | /* If there were changes, this means spill code was added, | |
814 | therefore repeat some things, including some initialization | |
815 | of global data structures. */ | |
816 | if ((debug_new_regalloc & DUMP_REGCLASS) == 0) | |
c263766c | 817 | dump_file = NULL; |
ed8d2920 MM |
818 | /* We have new pseudos (the stackwebs). */ |
819 | allocate_reg_info (max_reg_num (), FALSE, FALSE); | |
820 | /* And new insns. */ | |
821 | compute_bb_for_insn (); | |
822 | /* Some of them might be dead. */ | |
823 | delete_trivially_dead_insns (get_insns (), max_reg_num ()); | |
824 | /* Those new pseudos need to have their REFS count set. */ | |
825 | reg_scan_update (get_insns (), NULL, max_regno); | |
826 | max_regno = max_reg_num (); | |
3d042e77 | 827 | /* And they need useful classes too. */ |
c263766c RH |
828 | regclass (get_insns (), max_reg_num (), dump_file); |
829 | dump_file = ra_dump_file; | |
ed8d2920 MM |
830 | |
831 | /* Remember the number of defs and uses, so we can distinguish | |
832 | new from old refs in the next pass. */ | |
833 | last_def_id = df->def_id; | |
834 | last_use_id = df->use_id; | |
835 | } | |
836 | ||
837 | /* Output the graph, and possibly the current insn sequence. */ | |
838 | dump_ra (df); | |
839 | if (changed && (debug_new_regalloc & DUMP_RTL) != 0) | |
840 | { | |
c263766c RH |
841 | ra_print_rtl_with_bb (dump_file, get_insns ()); |
842 | fflush (dump_file); | |
ed8d2920 MM |
843 | } |
844 | ||
845 | /* Reset the web lists. */ | |
846 | reset_lists (); | |
847 | free_mem (df); | |
848 | } | |
849 | while (changed); | |
850 | ||
851 | /* We are done with allocation, free all memory and output some | |
852 | debug info. */ | |
853 | free_all_mem (df); | |
854 | df_finish (df); | |
855 | if ((debug_new_regalloc & DUMP_RESULTS) == 0) | |
856 | dump_cost (DUMP_COSTS); | |
857 | ra_debug_msg (DUMP_COSTS, "ticks for build-phase: %ld\n", ticks_build); | |
858 | ra_debug_msg (DUMP_COSTS, "ticks for rebuild-phase: %ld\n", ticks_rebuild); | |
859 | if ((debug_new_regalloc & (DUMP_FINAL_RTL | DUMP_RTL)) != 0) | |
c263766c | 860 | ra_print_rtl_with_bb (dump_file, get_insns ()); |
ed8d2920 MM |
861 | |
862 | /* We might have new pseudos, so allocate the info arrays for them. */ | |
863 | if ((debug_new_regalloc & DUMP_SM) == 0) | |
c263766c | 864 | dump_file = NULL; |
ed8d2920 MM |
865 | no_new_pseudos = 0; |
866 | allocate_reg_info (max_reg_num (), FALSE, FALSE); | |
867 | no_new_pseudos = 1; | |
c263766c | 868 | dump_file = ra_dump_file; |
ed8d2920 MM |
869 | |
870 | /* Some spill insns could've been inserted after trapping calls, i.e. | |
871 | at the end of a basic block, which really ends at that call. | |
872 | Fixup that breakages by adjusting basic block boundaries. */ | |
873 | fixup_abnormal_edges (); | |
874 | ||
875 | /* Cleanup the flow graph. */ | |
876 | if ((debug_new_regalloc & DUMP_LAST_FLOW) == 0) | |
c263766c | 877 | dump_file = NULL; |
827c06b6 | 878 | life_analysis (dump_file, |
ed8d2920 MM |
879 | PROP_DEATH_NOTES | PROP_LOG_LINKS | PROP_REG_INFO); |
880 | cleanup_cfg (CLEANUP_EXPENSIVE); | |
881 | recompute_reg_usage (get_insns (), TRUE); | |
c263766c RH |
882 | if (dump_file) |
883 | dump_flow_info (dump_file); | |
884 | dump_file = ra_dump_file; | |
ed8d2920 MM |
885 | |
886 | /* update_equiv_regs() can't be called after register allocation. | |
887 | It might delete some pseudos, and insert other insns setting | |
888 | up those pseudos in different places. This of course screws up | |
889 | the allocation because that may destroy a hardreg for another | |
890 | pseudo. | |
891 | XXX we probably should do something like that on our own. I.e. | |
892 | creating REG_EQUIV notes. */ | |
893 | /*update_equiv_regs ();*/ | |
894 | ||
895 | /* Setup the reg_renumber[] array for reload. */ | |
896 | setup_renumber (1); | |
897 | sbitmap_free (insns_with_deaths); | |
898 | ||
899 | /* Remove REG_DEAD notes which are incorrectly set. See the docu | |
900 | of that function. */ | |
901 | remove_suspicious_death_notes (); | |
902 | ||
903 | if ((debug_new_regalloc & DUMP_LAST_RTL) != 0) | |
c263766c RH |
904 | ra_print_rtl_with_bb (dump_file, get_insns ()); |
905 | dump_static_insn_cost (dump_file, | |
ed8d2920 MM |
906 | "after allocation/spilling, before reload", NULL); |
907 | ||
908 | /* Allocate the reg_equiv_memory_loc array for reload. */ | |
965ccc5a R |
909 | VARRAY_GROW (reg_equiv_memory_loc_varray, max_regno); |
910 | reg_equiv_memory_loc = &VARRAY_RTX (reg_equiv_memory_loc_varray, 0); | |
ed8d2920 MM |
911 | /* And possibly initialize it. */ |
912 | allocate_initial_values (reg_equiv_memory_loc); | |
913 | /* And one last regclass pass just before reload. */ | |
c263766c | 914 | regclass (get_insns (), max_reg_num (), dump_file); |
ed8d2920 MM |
915 | } |
916 | ||
917 | /* | |
918 | vim:cinoptions={.5s,g0,p5,t0,(0,^-0.5s,n-0.5s:tw=78:cindent:sw=4: | |
919 | */ |