]>
Commit | Line | Data |
---|---|---|
f4e72d6e | 1 | /* Generic partial redundancy elimination with lazy code motion support. |
9311a396 | 2 | Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. |
d2ecda27 JL |
3 | |
4 | This file is part of GNU CC. | |
5 | ||
6 | GNU CC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GNU CC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU CC; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | /* These routines are meant to be used by various optimization | |
22 | passes which can be modeled as lazy code motion problems. | |
23 | Including, but not limited to: | |
24 | ||
25 | * Traditional partial redundancy elimination. | |
26 | ||
27 | * Placement of caller/caller register save/restores. | |
28 | ||
29 | * Load/store motion. | |
30 | ||
31 | * Copy motion. | |
32 | ||
33 | * Conversion of flat register files to a stacked register | |
34 | model. | |
35 | ||
36 | * Dead load/store elimination. | |
37 | ||
38 | These routines accept as input: | |
39 | ||
40 | * Basic block information (number of blocks, lists of | |
41 | predecessors and successors). Note the granularity | |
42 | does not need to be basic block, they could be statements | |
43 | or functions. | |
44 | ||
45 | * Bitmaps of local properties (computed, transparent and | |
46 | anticipatable expressions). | |
47 | ||
48 | The output of these routines is bitmap of redundant computations | |
49 | and a bitmap of optimal placement points. */ | |
50 | ||
51 | ||
52 | #include "config.h" | |
53 | #include "system.h" | |
d2ecda27 JL |
54 | #include "rtl.h" |
55 | #include "regs.h" | |
56 | #include "hard-reg-set.h" | |
57 | #include "flags.h" | |
58 | #include "real.h" | |
59 | #include "insn-config.h" | |
60 | #include "recog.h" | |
61 | #include "basic-block.h" | |
9f09b1f2 | 62 | #include "tm_p.h" |
f4e72d6e | 63 | |
9f09b1f2 R |
64 | /* We want target macros for the mode switching code to be able to refer |
65 | to instruction attribute values. */ | |
66 | #include "insn-attr.h" | |
d2ecda27 | 67 | |
a42cd965 | 68 | /* Edge based LCM routines. */ |
f4e72d6e RK |
69 | static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *, |
70 | sbitmap *, sbitmap *)); | |
71 | static void compute_earliest PARAMS ((struct edge_list *, int, | |
72 | sbitmap *, sbitmap *, | |
73 | sbitmap *, sbitmap *, | |
74 | sbitmap *)); | |
75 | static void compute_laterin PARAMS ((struct edge_list *, sbitmap *, | |
76 | sbitmap *, sbitmap *, | |
77 | sbitmap *)); | |
78 | static void compute_insert_delete PARAMS ((struct edge_list *edge_list, | |
79 | sbitmap *, sbitmap *, | |
80 | sbitmap *, sbitmap *, | |
81 | sbitmap *)); | |
a42cd965 AM |
82 | |
83 | /* Edge based LCM routines on a reverse flowgraph. */ | |
f4e72d6e RK |
84 | static void compute_farthest PARAMS ((struct edge_list *, int, |
85 | sbitmap *, sbitmap *, | |
86 | sbitmap*, sbitmap *, | |
87 | sbitmap *)); | |
88 | static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *, | |
89 | sbitmap *, sbitmap *, | |
90 | sbitmap *)); | |
91 | static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list, | |
92 | sbitmap *, sbitmap *, | |
93 | sbitmap *, sbitmap *, | |
94 | sbitmap *)); | |
a42cd965 AM |
95 | \f |
96 | /* Edge based lcm routines. */ | |
97 | ||
98 | /* Compute expression anticipatability at entrance and exit of each block. | |
99 | This is done based on the flow graph, and not on the pred-succ lists. | |
100 | Other than that, its pretty much identical to compute_antinout. */ | |
d2ecda27 JL |
101 | |
102 | static void | |
a42cd965 | 103 | compute_antinout_edge (antloc, transp, antin, antout) |
d2ecda27 JL |
104 | sbitmap *antloc; |
105 | sbitmap *transp; | |
106 | sbitmap *antin; | |
107 | sbitmap *antout; | |
108 | { | |
bd0eaec2 | 109 | int bb; |
a42cd965 | 110 | edge e; |
274969ea MM |
111 | basic_block *worklist, *qin, *qout, *qend; |
112 | unsigned int qlen; | |
d2ecda27 | 113 | |
bd0eaec2 JL |
114 | /* Allocate a worklist array/queue. Entries are only added to the |
115 | list if they were not already on the list. So the size is | |
116 | bounded by the number of basic blocks. */ | |
274969ea | 117 | qin = qout = worklist |
f4e72d6e | 118 | = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks); |
d2ecda27 | 119 | |
bd0eaec2 JL |
120 | /* We want a maximal solution, so make an optimistic initialization of |
121 | ANTIN. */ | |
122 | sbitmap_vector_ones (antin, n_basic_blocks); | |
d2ecda27 | 123 | |
ce724250 JL |
124 | /* Put every block on the worklist; this is necessary because of the |
125 | optimistic initialization of ANTIN above. */ | |
274969ea | 126 | for (bb = n_basic_blocks - 1; bb >= 0; bb--) |
d2ecda27 | 127 | { |
274969ea | 128 | *qin++ = BASIC_BLOCK (bb); |
ce724250 | 129 | BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb); |
bd0eaec2 | 130 | } |
274969ea MM |
131 | |
132 | qin = worklist; | |
133 | qend = &worklist[n_basic_blocks]; | |
134 | qlen = n_basic_blocks; | |
d2ecda27 | 135 | |
ce724250 JL |
136 | /* Mark blocks which are predecessors of the exit block so that we |
137 | can easily identify them below. */ | |
138 | for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) | |
139 | e->src->aux = EXIT_BLOCK_PTR; | |
140 | ||
bd0eaec2 | 141 | /* Iterate until the worklist is empty. */ |
274969ea | 142 | while (qlen) |
bd0eaec2 JL |
143 | { |
144 | /* Take the first entry off the worklist. */ | |
274969ea | 145 | basic_block b = *qout++; |
bd0eaec2 | 146 | bb = b->index; |
274969ea MM |
147 | qlen--; |
148 | ||
149 | if (qout >= qend) | |
150 | qout = worklist; | |
d2ecda27 | 151 | |
bd0eaec2 | 152 | if (b->aux == EXIT_BLOCK_PTR) |
f4e72d6e RK |
153 | /* Do not clear the aux field for blocks which are predecessors of |
154 | the EXIT block. That way we never add then to the worklist | |
155 | again. */ | |
156 | sbitmap_zero (antout[bb]); | |
bd0eaec2 JL |
157 | else |
158 | { | |
159 | /* Clear the aux field of this block so that it can be added to | |
160 | the worklist again if necessary. */ | |
161 | b->aux = NULL; | |
162 | sbitmap_intersection_of_succs (antout[bb], antin, bb); | |
163 | } | |
a42cd965 | 164 | |
bd0eaec2 | 165 | if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], transp[bb], antout[bb])) |
f4e72d6e RK |
166 | /* If the in state of this block changed, then we need |
167 | to add the predecessors of this block to the worklist | |
168 | if they are not already on the worklist. */ | |
169 | for (e = b->pred; e; e = e->pred_next) | |
170 | if (!e->src->aux && e->src != ENTRY_BLOCK_PTR) | |
d2ecda27 | 171 | { |
274969ea | 172 | *qin++ = e->src; |
f4e72d6e | 173 | e->src->aux = e; |
274969ea MM |
174 | qlen++; |
175 | if (qin >= qend) | |
176 | qin = worklist; | |
d2ecda27 | 177 | } |
d2ecda27 | 178 | } |
f4e72d6e | 179 | |
274969ea | 180 | free (worklist); |
d2ecda27 JL |
181 | } |
182 | ||
a42cd965 | 183 | /* Compute the earliest vector for edge based lcm. */ |
f4e72d6e | 184 | |
d2ecda27 | 185 | static void |
a42cd965 AM |
186 | compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest) |
187 | struct edge_list *edge_list; | |
d2ecda27 | 188 | int n_exprs; |
a42cd965 | 189 | sbitmap *antin, *antout, *avout, *kill, *earliest; |
d2ecda27 | 190 | { |
a42cd965 AM |
191 | sbitmap difference, temp_bitmap; |
192 | int x, num_edges; | |
193 | basic_block pred, succ; | |
d2ecda27 | 194 | |
a42cd965 | 195 | num_edges = NUM_EDGES (edge_list); |
d2ecda27 | 196 | |
a42cd965 AM |
197 | difference = sbitmap_alloc (n_exprs); |
198 | temp_bitmap = sbitmap_alloc (n_exprs); | |
d2ecda27 | 199 | |
a42cd965 | 200 | for (x = 0; x < num_edges; x++) |
d2ecda27 | 201 | { |
a42cd965 AM |
202 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
203 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); | |
204 | if (pred == ENTRY_BLOCK_PTR) | |
205 | sbitmap_copy (earliest[x], antin[succ->index]); | |
206 | else | |
207 | { | |
208 | if (succ == EXIT_BLOCK_PTR) | |
f4e72d6e | 209 | sbitmap_zero (earliest[x]); |
a42cd965 | 210 | else |
d2ecda27 | 211 | { |
a42cd965 AM |
212 | sbitmap_difference (difference, antin[succ->index], |
213 | avout[pred->index]); | |
214 | sbitmap_not (temp_bitmap, antout[pred->index]); | |
f4e72d6e RK |
215 | sbitmap_a_and_b_or_c (earliest[x], difference, |
216 | kill[pred->index], temp_bitmap); | |
d2ecda27 JL |
217 | } |
218 | } | |
d2ecda27 | 219 | } |
f4e72d6e | 220 | |
d2ecda27 | 221 | free (temp_bitmap); |
a42cd965 | 222 | free (difference); |
d2ecda27 JL |
223 | } |
224 | ||
bd0eaec2 JL |
225 | /* later(p,s) is dependent on the calculation of laterin(p). |
226 | laterin(p) is dependent on the calculation of later(p2,p). | |
227 | ||
228 | laterin(ENTRY) is defined as all 0's | |
229 | later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY) | |
230 | laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)). | |
231 | ||
232 | If we progress in this manner, starting with all basic blocks | |
233 | in the work list, anytime we change later(bb), we need to add | |
234 | succs(bb) to the worklist if they are not already on the worklist. | |
235 | ||
236 | Boundary conditions: | |
237 | ||
238 | We prime the worklist all the normal basic blocks. The ENTRY block can | |
239 | never be added to the worklist since it is never the successor of any | |
240 | block. We explicitly prevent the EXIT block from being added to the | |
241 | worklist. | |
242 | ||
243 | We optimistically initialize LATER. That is the only time this routine | |
244 | will compute LATER for an edge out of the entry block since the entry | |
245 | block is never on the worklist. Thus, LATERIN is neither used nor | |
246 | computed for the ENTRY block. | |
247 | ||
248 | Since the EXIT block is never added to the worklist, we will neither | |
249 | use nor compute LATERIN for the exit block. Edges which reach the | |
250 | EXIT block are handled in the normal fashion inside the loop. However, | |
251 | the insertion/deletion computation needs LATERIN(EXIT), so we have | |
252 | to compute it. */ | |
253 | ||
d2ecda27 | 254 | static void |
bd0eaec2 | 255 | compute_laterin (edge_list, earliest, antloc, later, laterin) |
a42cd965 | 256 | struct edge_list *edge_list; |
a42cd965 | 257 | sbitmap *earliest, *antloc, *later, *laterin; |
d2ecda27 | 258 | { |
bd0eaec2 JL |
259 | int bb, num_edges, i; |
260 | edge e; | |
274969ea MM |
261 | basic_block *worklist, *qin, *qout, *qend; |
262 | unsigned int qlen; | |
d2ecda27 | 263 | |
a42cd965 | 264 | num_edges = NUM_EDGES (edge_list); |
d2ecda27 | 265 | |
bd0eaec2 JL |
266 | /* Allocate a worklist array/queue. Entries are only added to the |
267 | list if they were not already on the list. So the size is | |
268 | bounded by the number of basic blocks. */ | |
274969ea | 269 | qin = qout = worklist |
f4e72d6e | 270 | = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1)); |
bd0eaec2 JL |
271 | |
272 | /* Initialize a mapping from each edge to its index. */ | |
273 | for (i = 0; i < num_edges; i++) | |
63408827 | 274 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
bd0eaec2 JL |
275 | |
276 | /* We want a maximal solution, so initially consider LATER true for | |
277 | all edges. This allows propagation through a loop since the incoming | |
278 | loop edge will have LATER set, so if all the other incoming edges | |
279 | to the loop are set, then LATERIN will be set for the head of the | |
280 | loop. | |
281 | ||
282 | If the optimistic setting of LATER on that edge was incorrect (for | |
283 | example the expression is ANTLOC in a block within the loop) then | |
284 | this algorithm will detect it when we process the block at the head | |
285 | of the optimistic edge. That will requeue the affected blocks. */ | |
286 | sbitmap_vector_ones (later, num_edges); | |
287 | ||
89e606c9 JL |
288 | /* Note that even though we want an optimistic setting of LATER, we |
289 | do not want to be overly optimistic. Consider an outgoing edge from | |
290 | the entry block. That edge should always have a LATER value the | |
291 | same as EARLIEST for that edge. */ | |
292 | for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next) | |
f4e72d6e | 293 | sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]); |
89e606c9 | 294 | |
bd0eaec2 JL |
295 | /* Add all the blocks to the worklist. This prevents an early exit from |
296 | the loop given our optimistic initialization of LATER above. */ | |
274969ea | 297 | for (bb = 0; bb < n_basic_blocks; bb++) |
d2ecda27 | 298 | { |
bd0eaec2 | 299 | basic_block b = BASIC_BLOCK (bb); |
274969ea | 300 | *qin++ = b; |
bd0eaec2 | 301 | b->aux = b; |
a42cd965 | 302 | } |
274969ea MM |
303 | qin = worklist; |
304 | /* Note that we do not use the last allocated element for our queue, | |
305 | as EXIT_BLOCK is never inserted into it. In fact the above allocation | |
306 | of n_basic_blocks + 1 elements is not encessary. */ | |
307 | qend = &worklist[n_basic_blocks]; | |
308 | qlen = n_basic_blocks; | |
a42cd965 | 309 | |
bd0eaec2 | 310 | /* Iterate until the worklist is empty. */ |
274969ea | 311 | while (qlen) |
a42cd965 | 312 | { |
bd0eaec2 | 313 | /* Take the first entry off the worklist. */ |
274969ea | 314 | basic_block b = *qout++; |
bd0eaec2 | 315 | b->aux = NULL; |
274969ea MM |
316 | qlen--; |
317 | if (qout >= qend) | |
318 | qout = worklist; | |
bd0eaec2 JL |
319 | |
320 | /* Compute the intersection of LATERIN for each incoming edge to B. */ | |
321 | bb = b->index; | |
322 | sbitmap_ones (laterin[bb]); | |
323 | for (e = b->pred; e != NULL; e = e->pred_next) | |
63408827 | 324 | sbitmap_a_and_b (laterin[bb], laterin[bb], later[(size_t)e->aux]); |
bd0eaec2 JL |
325 | |
326 | /* Calculate LATER for all outgoing edges. */ | |
327 | for (e = b->succ; e != NULL; e = e->succ_next) | |
f4e72d6e RK |
328 | if (sbitmap_union_of_diff (later[(size_t) e->aux], |
329 | earliest[(size_t) e->aux], | |
330 | laterin[e->src->index], | |
331 | antloc[e->src->index]) | |
332 | /* If LATER for an outgoing edge was changed, then we need | |
333 | to add the target of the outgoing edge to the worklist. */ | |
334 | && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0) | |
335 | { | |
274969ea | 336 | *qin++ = e->dest; |
f4e72d6e | 337 | e->dest->aux = e; |
274969ea MM |
338 | qlen++; |
339 | if (qin >= qend) | |
340 | qin = worklist; | |
f4e72d6e | 341 | } |
d2ecda27 JL |
342 | } |
343 | ||
bd0eaec2 JL |
344 | /* Computation of insertion and deletion points requires computing LATERIN |
345 | for the EXIT block. We allocated an extra entry in the LATERIN array | |
346 | for just this purpose. */ | |
347 | sbitmap_ones (laterin[n_basic_blocks]); | |
348 | for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next) | |
349 | sbitmap_a_and_b (laterin[n_basic_blocks], | |
350 | laterin[n_basic_blocks], | |
63408827 | 351 | later[(size_t) e->aux]); |
bd0eaec2 | 352 | |
274969ea | 353 | free (worklist); |
d2ecda27 JL |
354 | } |
355 | ||
a42cd965 | 356 | /* Compute the insertion and deletion points for edge based LCM. */ |
f4e72d6e | 357 | |
a42cd965 AM |
358 | static void |
359 | compute_insert_delete (edge_list, antloc, later, laterin, | |
360 | insert, delete) | |
361 | struct edge_list *edge_list; | |
362 | sbitmap *antloc, *later, *laterin, *insert, *delete; | |
363 | { | |
364 | int x; | |
d2ecda27 | 365 | |
a42cd965 AM |
366 | for (x = 0; x < n_basic_blocks; x++) |
367 | sbitmap_difference (delete[x], antloc[x], laterin[x]); | |
368 | ||
369 | for (x = 0; x < NUM_EDGES (edge_list); x++) | |
370 | { | |
371 | basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x); | |
f4e72d6e | 372 | |
a42cd965 AM |
373 | if (b == EXIT_BLOCK_PTR) |
374 | sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]); | |
375 | else | |
376 | sbitmap_difference (insert[x], later[x], laterin[b->index]); | |
377 | } | |
378 | } | |
d2ecda27 | 379 | |
f4e72d6e RK |
380 | /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and |
381 | delete vectors for edge based LCM. Returns an edgelist which is used to | |
382 | map the insert vector to what edge an expression should be inserted on. */ | |
d2ecda27 | 383 | |
a42cd965 AM |
384 | struct edge_list * |
385 | pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete) | |
4b66e1c0 | 386 | FILE *file ATTRIBUTE_UNUSED; |
d2ecda27 | 387 | int n_exprs; |
a42cd965 AM |
388 | sbitmap *transp; |
389 | sbitmap *avloc; | |
d2ecda27 | 390 | sbitmap *antloc; |
a42cd965 AM |
391 | sbitmap *kill; |
392 | sbitmap **insert; | |
393 | sbitmap **delete; | |
d2ecda27 | 394 | { |
a42cd965 AM |
395 | sbitmap *antin, *antout, *earliest; |
396 | sbitmap *avin, *avout; | |
397 | sbitmap *later, *laterin; | |
398 | struct edge_list *edge_list; | |
399 | int num_edges; | |
d2ecda27 | 400 | |
a42cd965 AM |
401 | edge_list = create_edge_list (); |
402 | num_edges = NUM_EDGES (edge_list); | |
d2ecda27 | 403 | |
a42cd965 AM |
404 | #ifdef LCM_DEBUG_INFO |
405 | if (file) | |
d2ecda27 | 406 | { |
a42cd965 AM |
407 | fprintf (file, "Edge List:\n"); |
408 | verify_edge_list (file, edge_list); | |
409 | print_edge_list (file, edge_list); | |
410 | dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); | |
411 | dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks); | |
412 | dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks); | |
413 | dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks); | |
d2ecda27 | 414 | } |
a42cd965 | 415 | #endif |
d2ecda27 | 416 | |
a42cd965 AM |
417 | /* Compute global availability. */ |
418 | avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
419 | avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
420 | compute_available (avloc, kill, avout, avin); | |
a42cd965 | 421 | free (avin); |
d2ecda27 | 422 | |
a42cd965 AM |
423 | /* Compute global anticipatability. */ |
424 | antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
425 | antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
426 | compute_antinout_edge (antloc, transp, antin, antout); | |
d2ecda27 | 427 | |
a42cd965 AM |
428 | #ifdef LCM_DEBUG_INFO |
429 | if (file) | |
d2ecda27 | 430 | { |
a42cd965 AM |
431 | dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks); |
432 | dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks); | |
d2ecda27 | 433 | } |
a42cd965 | 434 | #endif |
d2ecda27 | 435 | |
a42cd965 AM |
436 | /* Compute earliestness. */ |
437 | earliest = sbitmap_vector_alloc (num_edges, n_exprs); | |
438 | compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest); | |
d2ecda27 | 439 | |
a42cd965 AM |
440 | #ifdef LCM_DEBUG_INFO |
441 | if (file) | |
442 | dump_sbitmap_vector (file, "earliest", "", earliest, num_edges); | |
443 | #endif | |
d2ecda27 | 444 | |
a42cd965 AM |
445 | free (antout); |
446 | free (antin); | |
447 | free (avout); | |
d2ecda27 | 448 | |
a42cd965 | 449 | later = sbitmap_vector_alloc (num_edges, n_exprs); |
f4e72d6e | 450 | |
a42cd965 AM |
451 | /* Allocate an extra element for the exit block in the laterin vector. */ |
452 | laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); | |
bd0eaec2 JL |
453 | compute_laterin (edge_list, earliest, antloc, later, laterin); |
454 | ||
a42cd965 AM |
455 | #ifdef LCM_DEBUG_INFO |
456 | if (file) | |
457 | { | |
458 | dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1); | |
459 | dump_sbitmap_vector (file, "later", "", later, num_edges); | |
460 | } | |
461 | #endif | |
d2ecda27 | 462 | |
a42cd965 AM |
463 | free (earliest); |
464 | ||
465 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); | |
466 | *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
467 | compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete); | |
d2ecda27 | 468 | |
a42cd965 AM |
469 | free (laterin); |
470 | free (later); | |
471 | ||
472 | #ifdef LCM_DEBUG_INFO | |
473 | if (file) | |
d2ecda27 | 474 | { |
a42cd965 | 475 | dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); |
f4e72d6e RK |
476 | dump_sbitmap_vector (file, "pre_delete_map", "", *delete, |
477 | n_basic_blocks); | |
d2ecda27 | 478 | } |
a42cd965 | 479 | #endif |
d2ecda27 | 480 | |
a42cd965 AM |
481 | return edge_list; |
482 | } | |
d2ecda27 | 483 | |
a42cd965 AM |
484 | /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors. |
485 | Return the number of passes we performed to iterate to a solution. */ | |
f4e72d6e | 486 | |
bd0eaec2 | 487 | void |
a42cd965 AM |
488 | compute_available (avloc, kill, avout, avin) |
489 | sbitmap *avloc, *kill, *avout, *avin; | |
d2ecda27 | 490 | { |
bd0eaec2 JL |
491 | int bb; |
492 | edge e; | |
274969ea MM |
493 | basic_block *worklist, *qin, *qout, *qend; |
494 | unsigned int qlen; | |
d2ecda27 | 495 | |
bd0eaec2 JL |
496 | /* Allocate a worklist array/queue. Entries are only added to the |
497 | list if they were not already on the list. So the size is | |
498 | bounded by the number of basic blocks. */ | |
274969ea | 499 | qin = qout = worklist |
f4e72d6e | 500 | = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks); |
d2ecda27 | 501 | |
bd0eaec2 JL |
502 | /* We want a maximal solution. */ |
503 | sbitmap_vector_ones (avout, n_basic_blocks); | |
504 | ||
ce724250 JL |
505 | /* Put every block on the worklist; this is necessary because of the |
506 | optimistic initialization of AVOUT above. */ | |
274969ea | 507 | for (bb = 0; bb < n_basic_blocks; bb++) |
d2ecda27 | 508 | { |
274969ea | 509 | *qin++ = BASIC_BLOCK (bb); |
ce724250 | 510 | BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb); |
d2ecda27 | 511 | } |
274969ea MM |
512 | |
513 | qin = worklist; | |
514 | qend = &worklist[n_basic_blocks]; | |
515 | qlen = n_basic_blocks; | |
bd0eaec2 | 516 | |
ce724250 JL |
517 | /* Mark blocks which are successors of the entry block so that we |
518 | can easily identify them below. */ | |
519 | for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next) | |
520 | e->dest->aux = ENTRY_BLOCK_PTR; | |
521 | ||
bd0eaec2 | 522 | /* Iterate until the worklist is empty. */ |
274969ea | 523 | while (qlen) |
bd0eaec2 JL |
524 | { |
525 | /* Take the first entry off the worklist. */ | |
274969ea | 526 | basic_block b = *qout++; |
bd0eaec2 | 527 | bb = b->index; |
274969ea MM |
528 | qlen--; |
529 | ||
530 | if (qout >= qend) | |
531 | qout = worklist; | |
bd0eaec2 JL |
532 | |
533 | /* If one of the predecessor blocks is the ENTRY block, then the | |
534 | intersection of avouts is the null set. We can identify such blocks | |
535 | by the special value in the AUX field in the block structure. */ | |
536 | if (b->aux == ENTRY_BLOCK_PTR) | |
f4e72d6e RK |
537 | /* Do not clear the aux field for blocks which are successors of the |
538 | ENTRY block. That way we never add then to the worklist again. */ | |
539 | sbitmap_zero (avin[bb]); | |
bd0eaec2 JL |
540 | else |
541 | { | |
542 | /* Clear the aux field of this block so that it can be added to | |
543 | the worklist again if necessary. */ | |
544 | b->aux = NULL; | |
545 | sbitmap_intersection_of_preds (avin[bb], avout, bb); | |
546 | } | |
547 | ||
548 | if (sbitmap_union_of_diff (avout[bb], avloc[bb], avin[bb], kill[bb])) | |
f4e72d6e RK |
549 | /* If the out state of this block changed, then we need |
550 | to add the successors of this block to the worklist | |
551 | if they are not already on the worklist. */ | |
552 | for (e = b->succ; e; e = e->succ_next) | |
553 | if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR) | |
bd0eaec2 | 554 | { |
274969ea | 555 | *qin++ = e->dest; |
f4e72d6e | 556 | e->dest->aux = e; |
274969ea MM |
557 | qlen++; |
558 | ||
559 | if (qin >= qend) | |
560 | qin = worklist; | |
bd0eaec2 | 561 | } |
bd0eaec2 | 562 | } |
f4e72d6e | 563 | |
274969ea | 564 | free (worklist); |
d2ecda27 JL |
565 | } |
566 | ||
a42cd965 | 567 | /* Compute the farthest vector for edge based lcm. */ |
f4e72d6e | 568 | |
d2ecda27 | 569 | static void |
a42cd965 AM |
570 | compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, |
571 | kill, farthest) | |
572 | struct edge_list *edge_list; | |
d2ecda27 | 573 | int n_exprs; |
a42cd965 | 574 | sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest; |
d2ecda27 | 575 | { |
a42cd965 AM |
576 | sbitmap difference, temp_bitmap; |
577 | int x, num_edges; | |
578 | basic_block pred, succ; | |
d2ecda27 | 579 | |
a42cd965 | 580 | num_edges = NUM_EDGES (edge_list); |
d2ecda27 | 581 | |
a42cd965 AM |
582 | difference = sbitmap_alloc (n_exprs); |
583 | temp_bitmap = sbitmap_alloc (n_exprs); | |
d2ecda27 | 584 | |
a42cd965 | 585 | for (x = 0; x < num_edges; x++) |
d2ecda27 | 586 | { |
a42cd965 AM |
587 | pred = INDEX_EDGE_PRED_BB (edge_list, x); |
588 | succ = INDEX_EDGE_SUCC_BB (edge_list, x); | |
589 | if (succ == EXIT_BLOCK_PTR) | |
590 | sbitmap_copy (farthest[x], st_avout[pred->index]); | |
591 | else | |
d2ecda27 | 592 | { |
a42cd965 | 593 | if (pred == ENTRY_BLOCK_PTR) |
f4e72d6e | 594 | sbitmap_zero (farthest[x]); |
a42cd965 AM |
595 | else |
596 | { | |
597 | sbitmap_difference (difference, st_avout[pred->index], | |
598 | st_antin[succ->index]); | |
599 | sbitmap_not (temp_bitmap, st_avin[succ->index]); | |
600 | sbitmap_a_and_b_or_c (farthest[x], difference, | |
601 | kill[succ->index], temp_bitmap); | |
602 | } | |
d2ecda27 | 603 | } |
d2ecda27 | 604 | } |
f4e72d6e | 605 | |
d2ecda27 | 606 | free (temp_bitmap); |
a42cd965 | 607 | free (difference); |
d2ecda27 JL |
608 | } |
609 | ||
bd0eaec2 JL |
610 | /* Compute nearer and nearerout vectors for edge based lcm. |
611 | ||
612 | This is the mirror of compute_laterin, additional comments on the | |
613 | implementation can be found before compute_laterin. */ | |
614 | ||
d2ecda27 | 615 | static void |
bd0eaec2 | 616 | compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout) |
a42cd965 | 617 | struct edge_list *edge_list; |
a42cd965 | 618 | sbitmap *farthest, *st_avloc, *nearer, *nearerout; |
d2ecda27 | 619 | { |
bd0eaec2 JL |
620 | int bb, num_edges, i; |
621 | edge e; | |
622 | basic_block *worklist, *tos; | |
d2ecda27 | 623 | |
a42cd965 | 624 | num_edges = NUM_EDGES (edge_list); |
d2ecda27 | 625 | |
bd0eaec2 JL |
626 | /* Allocate a worklist array/queue. Entries are only added to the |
627 | list if they were not already on the list. So the size is | |
628 | bounded by the number of basic blocks. */ | |
f4e72d6e RK |
629 | tos = worklist |
630 | = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1)); | |
d2ecda27 | 631 | |
bd0eaec2 JL |
632 | /* Initialize NEARER for each edge and build a mapping from an edge to |
633 | its index. */ | |
634 | for (i = 0; i < num_edges; i++) | |
63408827 | 635 | INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i; |
a42cd965 | 636 | |
bd0eaec2 JL |
637 | /* We want a maximal solution. */ |
638 | sbitmap_vector_ones (nearer, num_edges); | |
639 | ||
89e606c9 JL |
640 | /* Note that even though we want an optimistic setting of NEARER, we |
641 | do not want to be overly optimistic. Consider an incoming edge to | |
642 | the exit block. That edge should always have a NEARER value the | |
643 | same as FARTHEST for that edge. */ | |
644 | for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) | |
e5b7ca32 | 645 | sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]); |
89e606c9 | 646 | |
bd0eaec2 JL |
647 | /* Add all the blocks to the worklist. This prevents an early exit |
648 | from the loop given our optimistic initialization of NEARER. */ | |
649 | for (bb = 0; bb < n_basic_blocks; bb++) | |
d2ecda27 | 650 | { |
bd0eaec2 JL |
651 | basic_block b = BASIC_BLOCK (bb); |
652 | *tos++ = b; | |
653 | b->aux = b; | |
a42cd965 | 654 | } |
bd0eaec2 JL |
655 | |
656 | /* Iterate until the worklist is empty. */ | |
657 | while (tos != worklist) | |
a42cd965 | 658 | { |
bd0eaec2 JL |
659 | /* Take the first entry off the worklist. */ |
660 | basic_block b = *--tos; | |
661 | b->aux = NULL; | |
662 | ||
663 | /* Compute the intersection of NEARER for each outgoing edge from B. */ | |
664 | bb = b->index; | |
665 | sbitmap_ones (nearerout[bb]); | |
666 | for (e = b->succ; e != NULL; e = e->succ_next) | |
63408827 RH |
667 | sbitmap_a_and_b (nearerout[bb], nearerout[bb], |
668 | nearer[(size_t) e->aux]); | |
bd0eaec2 JL |
669 | |
670 | /* Calculate NEARER for all incoming edges. */ | |
671 | for (e = b->pred; e != NULL; e = e->pred_next) | |
f4e72d6e RK |
672 | if (sbitmap_union_of_diff (nearer[(size_t) e->aux], |
673 | farthest[(size_t) e->aux], | |
674 | nearerout[e->dest->index], | |
675 | st_avloc[e->dest->index]) | |
676 | /* If NEARER for an incoming edge was changed, then we need | |
677 | to add the source of the incoming edge to the worklist. */ | |
678 | && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0) | |
679 | { | |
680 | *tos++ = e->src; | |
681 | e->src->aux = e; | |
682 | } | |
a42cd965 | 683 | } |
d2ecda27 | 684 | |
bd0eaec2 JL |
685 | /* Computation of insertion and deletion points requires computing NEAREROUT |
686 | for the ENTRY block. We allocated an extra entry in the NEAREROUT array | |
687 | for just this purpose. */ | |
688 | sbitmap_ones (nearerout[n_basic_blocks]); | |
689 | for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next) | |
690 | sbitmap_a_and_b (nearerout[n_basic_blocks], | |
691 | nearerout[n_basic_blocks], | |
63408827 | 692 | nearer[(size_t) e->aux]); |
bd0eaec2 JL |
693 | |
694 | free (tos); | |
a42cd965 | 695 | } |
d2ecda27 | 696 | |
a42cd965 | 697 | /* Compute the insertion and deletion points for edge based LCM. */ |
f4e72d6e | 698 | |
d2ecda27 | 699 | static void |
a42cd965 AM |
700 | compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, |
701 | insert, delete) | |
702 | struct edge_list *edge_list; | |
703 | sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete; | |
d2ecda27 | 704 | { |
a42cd965 | 705 | int x; |
d2ecda27 | 706 | |
a42cd965 AM |
707 | for (x = 0; x < n_basic_blocks; x++) |
708 | sbitmap_difference (delete[x], st_avloc[x], nearerout[x]); | |
709 | ||
710 | for (x = 0; x < NUM_EDGES (edge_list); x++) | |
d2ecda27 | 711 | { |
a42cd965 AM |
712 | basic_block b = INDEX_EDGE_PRED_BB (edge_list, x); |
713 | if (b == ENTRY_BLOCK_PTR) | |
714 | sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]); | |
d2ecda27 | 715 | else |
a42cd965 | 716 | sbitmap_difference (insert[x], nearer[x], nearerout[b->index]); |
d2ecda27 | 717 | } |
d2ecda27 JL |
718 | } |
719 | ||
a42cd965 AM |
720 | /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the |
721 | insert and delete vectors for edge based reverse LCM. Returns an | |
722 | edgelist which is used to map the insert vector to what edge | |
723 | an expression should be inserted on. */ | |
d2ecda27 | 724 | |
a42cd965 AM |
725 | struct edge_list * |
726 | pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill, | |
727 | insert, delete) | |
4b66e1c0 | 728 | FILE *file ATTRIBUTE_UNUSED; |
a42cd965 AM |
729 | int n_exprs; |
730 | sbitmap *transp; | |
731 | sbitmap *st_avloc; | |
732 | sbitmap *st_antloc; | |
733 | sbitmap *kill; | |
734 | sbitmap **insert; | |
735 | sbitmap **delete; | |
d2ecda27 | 736 | { |
a42cd965 AM |
737 | sbitmap *st_antin, *st_antout; |
738 | sbitmap *st_avout, *st_avin, *farthest; | |
739 | sbitmap *nearer, *nearerout; | |
740 | struct edge_list *edge_list; | |
4b66e1c0 | 741 | int num_edges; |
a42cd965 AM |
742 | |
743 | edge_list = create_edge_list (); | |
744 | num_edges = NUM_EDGES (edge_list); | |
745 | ||
746 | st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
747 | st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
748 | sbitmap_vector_zero (st_antin, n_basic_blocks); | |
749 | sbitmap_vector_zero (st_antout, n_basic_blocks); | |
750 | compute_antinout_edge (st_antloc, transp, st_antin, st_antout); | |
751 | ||
752 | /* Compute global anticipatability. */ | |
753 | st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
754 | st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
755 | compute_available (st_avloc, kill, st_avout, st_avin); | |
756 | ||
757 | #ifdef LCM_DEBUG_INFO | |
758 | if (file) | |
759 | { | |
760 | fprintf (file, "Edge List:\n"); | |
761 | verify_edge_list (file, edge_list); | |
762 | print_edge_list (file, edge_list); | |
763 | dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks); | |
764 | dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks); | |
765 | dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks); | |
766 | dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks); | |
767 | dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks); | |
768 | dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks); | |
769 | } | |
770 | #endif | |
d2ecda27 | 771 | |
a42cd965 AM |
772 | #ifdef LCM_DEBUG_INFO |
773 | if (file) | |
774 | { | |
775 | dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks); | |
776 | dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks); | |
777 | } | |
778 | #endif | |
d2ecda27 | 779 | |
a42cd965 AM |
780 | /* Compute farthestness. */ |
781 | farthest = sbitmap_vector_alloc (num_edges, n_exprs); | |
782 | compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin, | |
783 | kill, farthest); | |
784 | ||
785 | #ifdef LCM_DEBUG_INFO | |
786 | if (file) | |
787 | dump_sbitmap_vector (file, "farthest", "", farthest, num_edges); | |
788 | #endif | |
789 | ||
790 | free (st_avin); | |
791 | free (st_avout); | |
792 | ||
793 | nearer = sbitmap_vector_alloc (num_edges, n_exprs); | |
f4e72d6e | 794 | |
a42cd965 AM |
795 | /* Allocate an extra element for the entry block. */ |
796 | nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs); | |
bd0eaec2 | 797 | compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout); |
a42cd965 AM |
798 | |
799 | #ifdef LCM_DEBUG_INFO | |
800 | if (file) | |
d2ecda27 | 801 | { |
a42cd965 AM |
802 | dump_sbitmap_vector (file, "nearerout", "", nearerout, |
803 | n_basic_blocks + 1); | |
804 | dump_sbitmap_vector (file, "nearer", "", nearer, num_edges); | |
d2ecda27 | 805 | } |
a42cd965 AM |
806 | #endif |
807 | ||
808 | free (farthest); | |
809 | ||
810 | *insert = sbitmap_vector_alloc (num_edges, n_exprs); | |
811 | *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs); | |
f4e72d6e RK |
812 | compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout, |
813 | *insert, *delete); | |
a42cd965 AM |
814 | |
815 | free (nearerout); | |
816 | free (nearer); | |
817 | ||
818 | #ifdef LCM_DEBUG_INFO | |
819 | if (file) | |
820 | { | |
821 | dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges); | |
f4e72d6e RK |
822 | dump_sbitmap_vector (file, "pre_delete_map", "", *delete, |
823 | n_basic_blocks); | |
a42cd965 AM |
824 | } |
825 | #endif | |
826 | ||
827 | return edge_list; | |
d2ecda27 | 828 | } |
9f09b1f2 | 829 | |
f4e72d6e RK |
830 | /* Mode switching: |
831 | ||
832 | The algorithm for setting the modes consists of scanning the insn list | |
9f09b1f2 R |
833 | and finding all the insns which require a specific mode. Each insn gets |
834 | a unique struct seginfo element. These structures are inserted into a list | |
835 | for each basic block. For each entity, there is an array of bb_info over | |
836 | the flow graph basic blocks (local var 'bb_info'), and contains a list | |
837 | of all insns within that basic block, in the order they are encountered. | |
838 | ||
839 | For each entity, any basic block WITHOUT any insns requiring a specific | |
840 | mode are given a single entry, without a mode. (Each basic block | |
841 | in the flow graph must have at least one entry in the segment table.) | |
842 | ||
843 | The LCM algorithm is then run over the flow graph to determine where to | |
844 | place the sets to the highest-priority value in respect of first the first | |
845 | insn in any one block. Any adjustments required to the transparancy | |
846 | vectors are made, then the next iteration starts for the next-lower | |
847 | priority mode, till for each entity all modes are exhasted. | |
848 | ||
849 | More details are located in the code for optimize_mode_switching(). */ | |
850 | ||
851 | /* This structure contains the information for each insn which requires | |
852 | either single or double mode to be set. | |
853 | MODE is the mode this insn must be executed in. | |
1cca43ea AO |
854 | INSN_PTR is the insn to be executed (may be the note that marks the |
855 | beginning of a basic block). | |
9f09b1f2 R |
856 | BBNUM is the flow graph basic block this insn occurs in. |
857 | NEXT is the next insn in the same basic block. */ | |
858 | struct seginfo | |
859 | { | |
860 | int mode; | |
861 | rtx insn_ptr; | |
862 | int bbnum; | |
863 | struct seginfo *next; | |
864 | HARD_REG_SET regs_live; | |
865 | }; | |
866 | ||
867 | struct bb_info | |
868 | { | |
869 | struct seginfo *seginfo; | |
870 | int computing; | |
871 | }; | |
872 | ||
873 | /* These bitmaps are used for the LCM algorithm. */ | |
874 | ||
c8d8ed65 | 875 | #ifdef OPTIMIZE_MODE_SWITCHING |
9f09b1f2 R |
876 | static sbitmap *antic; |
877 | static sbitmap *transp; | |
878 | static sbitmap *comp; | |
879 | static sbitmap *delete; | |
880 | static sbitmap *insert; | |
881 | ||
1270c255 | 882 | static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET)); |
9f09b1f2 | 883 | static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *)); |
9f09b1f2 R |
884 | static void reg_dies PARAMS ((rtx, HARD_REG_SET)); |
885 | static void reg_becomes_live PARAMS ((rtx, rtx, void *)); | |
c8d8ed65 RK |
886 | static void make_preds_opaque PARAMS ((basic_block, int)); |
887 | #endif | |
888 | \f | |
889 | #ifdef OPTIMIZE_MODE_SWITCHING | |
9f09b1f2 R |
890 | |
891 | /* This function will allocate a new BBINFO structure, initialized | |
1270c255 | 892 | with the MODE, INSN, and basic block BB parameters. */ |
c8d8ed65 | 893 | |
9f09b1f2 R |
894 | static struct seginfo * |
895 | new_seginfo (mode, insn, bb, regs_live) | |
896 | int mode; | |
897 | rtx insn; | |
898 | int bb; | |
899 | HARD_REG_SET regs_live; | |
900 | { | |
901 | struct seginfo *ptr; | |
902 | ptr = xmalloc (sizeof (struct seginfo)); | |
903 | ptr->mode = mode; | |
904 | ptr->insn_ptr = insn; | |
905 | ptr->bbnum = bb; | |
906 | ptr->next = NULL; | |
907 | COPY_HARD_REG_SET (ptr->regs_live, regs_live); | |
908 | return ptr; | |
909 | } | |
910 | ||
911 | /* Add a seginfo element to the end of a list. | |
912 | HEAD is a pointer to the list beginning. | |
913 | INFO is the structure to be linked in. */ | |
c8d8ed65 | 914 | |
9f09b1f2 R |
915 | static void |
916 | add_seginfo (head, info) | |
917 | struct bb_info *head; | |
918 | struct seginfo *info; | |
919 | { | |
920 | struct seginfo *ptr; | |
921 | ||
922 | if (head->seginfo == NULL) | |
923 | head->seginfo = info; | |
924 | else | |
925 | { | |
926 | ptr = head->seginfo; | |
927 | while (ptr->next != NULL) | |
928 | ptr = ptr->next; | |
929 | ptr->next = info; | |
930 | } | |
931 | } | |
932 | ||
933 | /* Make all predecessors of basic block B opaque, recursively, till we hit | |
934 | some that are already non-transparent, or an edge where aux is set; that | |
935 | denotes that a mode set is to be done on that edge. | |
936 | J is the bit number in the bitmaps that corresponds to the entity that | |
937 | we are currently handling mode-switching for. */ | |
c8d8ed65 | 938 | |
9f09b1f2 R |
939 | static void |
940 | make_preds_opaque (b, j) | |
941 | basic_block b; | |
942 | int j; | |
943 | { | |
944 | edge e; | |
945 | ||
946 | for (e = b->pred; e; e = e->pred_next) | |
947 | { | |
948 | basic_block pb = e->src; | |
f4e72d6e | 949 | |
9f09b1f2 R |
950 | if (e->aux || ! TEST_BIT (transp[pb->index], j)) |
951 | continue; | |
f4e72d6e | 952 | |
9f09b1f2 R |
953 | RESET_BIT (transp[pb->index], j); |
954 | make_preds_opaque (pb, j); | |
955 | } | |
956 | } | |
957 | ||
958 | /* Record in LIVE that register REG died. */ | |
c8d8ed65 | 959 | |
9f09b1f2 R |
960 | static void |
961 | reg_dies (reg, live) | |
962 | rtx reg; | |
963 | HARD_REG_SET live; | |
964 | { | |
f4e72d6e | 965 | int regno, nregs; |
9f09b1f2 R |
966 | |
967 | if (GET_CODE (reg) != REG) | |
968 | return; | |
f4e72d6e | 969 | |
9f09b1f2 R |
970 | regno = REGNO (reg); |
971 | if (regno < FIRST_PSEUDO_REGISTER) | |
f4e72d6e RK |
972 | for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0; |
973 | nregs--) | |
974 | CLEAR_HARD_REG_BIT (live, regno + nregs); | |
9f09b1f2 R |
975 | } |
976 | ||
977 | /* Record in LIVE that register REG became live. | |
978 | This is called via note_stores. */ | |
c8d8ed65 | 979 | |
9f09b1f2 R |
980 | static void |
981 | reg_becomes_live (reg, setter, live) | |
982 | rtx reg; | |
983 | rtx setter ATTRIBUTE_UNUSED; | |
984 | void *live; | |
985 | { | |
f4e72d6e | 986 | int regno, nregs; |
9f09b1f2 R |
987 | |
988 | if (GET_CODE (reg) == SUBREG) | |
989 | reg = SUBREG_REG (reg); | |
990 | ||
991 | if (GET_CODE (reg) != REG) | |
992 | return; | |
993 | ||
994 | regno = REGNO (reg); | |
995 | if (regno < FIRST_PSEUDO_REGISTER) | |
f4e72d6e RK |
996 | for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0; |
997 | nregs--) | |
998 | SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs); | |
9f09b1f2 R |
999 | } |
1000 | ||
97d36f45 RH |
1001 | /* Find all insns that need a particular mode setting, and insert the |
1002 | necessary mode switches. Return true if we did work. */ | |
f4e72d6e | 1003 | |
97d36f45 | 1004 | int |
9f09b1f2 | 1005 | optimize_mode_switching (file) |
97d36f45 | 1006 | FILE *file; |
9f09b1f2 | 1007 | { |
9f09b1f2 R |
1008 | rtx insn; |
1009 | int bb, e; | |
1010 | edge eg; | |
1011 | int need_commit = 0; | |
1012 | sbitmap *kill; | |
1013 | struct edge_list *edge_list; | |
1014 | static int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; | |
1015 | #define N_ENTITIES (sizeof num_modes / sizeof (int)) | |
1016 | int entity_map[N_ENTITIES]; | |
1017 | struct bb_info *bb_info[N_ENTITIES]; | |
1018 | int i, j; | |
1019 | int n_entities; | |
1020 | int max_num_modes = 0; | |
1021 | ||
1022 | for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--) | |
f4e72d6e RK |
1023 | if (OPTIMIZE_MODE_SWITCHING (e)) |
1024 | { | |
1025 | /* Create the list of segments within each basic block. */ | |
1026 | bb_info[n_entities] | |
1027 | = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info); | |
1028 | entity_map[n_entities++] = e; | |
1029 | if (num_modes[e] > max_num_modes) | |
1030 | max_num_modes = num_modes[e]; | |
1031 | } | |
1032 | ||
9f09b1f2 | 1033 | if (! n_entities) |
97d36f45 | 1034 | return 0; |
9f09b1f2 | 1035 | |
9f09b1f2 R |
1036 | /* Create the bitmap vectors. */ |
1037 | ||
1038 | antic = sbitmap_vector_alloc (n_basic_blocks, n_entities); | |
1039 | transp = sbitmap_vector_alloc (n_basic_blocks, n_entities); | |
1040 | comp = sbitmap_vector_alloc (n_basic_blocks, n_entities); | |
1041 | ||
1042 | sbitmap_vector_ones (transp, n_basic_blocks); | |
1043 | ||
1044 | for (j = n_entities - 1; j >= 0; j--) | |
1045 | { | |
1046 | int e = entity_map[j]; | |
1047 | int no_mode = num_modes[e]; | |
1048 | struct bb_info *info = bb_info[j]; | |
1049 | ||
1050 | /* Determine what the first use (if any) need for a mode of entity E is. | |
97d36f45 | 1051 | This will be the mode that is anticipatable for this block. |
9f09b1f2 R |
1052 | Also compute the initial transparency settings. */ |
1053 | for (bb = 0 ; bb < n_basic_blocks; bb++) | |
1054 | { | |
1055 | struct seginfo *ptr; | |
1056 | int last_mode = no_mode; | |
1057 | HARD_REG_SET live_now; | |
1058 | ||
1059 | REG_SET_TO_HARD_REG_SET (live_now, | |
1060 | BASIC_BLOCK (bb)->global_live_at_start); | |
1061 | for (insn = BLOCK_HEAD (bb); | |
1062 | insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); | |
1063 | insn = NEXT_INSN (insn)) | |
1064 | { | |
2c3c49de | 1065 | if (INSN_P (insn)) |
9f09b1f2 R |
1066 | { |
1067 | int mode = MODE_NEEDED (e, insn); | |
1068 | rtx link; | |
1069 | ||
1070 | if (mode != no_mode && mode != last_mode) | |
1071 | { | |
1072 | last_mode = mode; | |
1073 | ptr = new_seginfo (mode, insn, bb, live_now); | |
1074 | add_seginfo (info + bb, ptr); | |
1075 | RESET_BIT (transp[bb], j); | |
1076 | } | |
1077 | ||
1078 | /* Update LIVE_NOW. */ | |
1079 | for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) | |
1080 | if (REG_NOTE_KIND (link) == REG_DEAD) | |
1081 | reg_dies (XEXP (link, 0), live_now); | |
f4e72d6e | 1082 | |
9f09b1f2 R |
1083 | note_stores (PATTERN (insn), reg_becomes_live, &live_now); |
1084 | for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) | |
1085 | if (REG_NOTE_KIND (link) == REG_UNUSED) | |
1086 | reg_dies (XEXP (link, 0), live_now); | |
1087 | } | |
1088 | } | |
f4e72d6e | 1089 | |
1270c255 CP |
1090 | /* If this is a predecessor of the exit block, and we must |
1091 | force a mode on exit, make note of that. */ | |
1092 | #ifdef NORMAL_MODE | |
1093 | if (NORMAL_MODE (e) != no_mode && last_mode != NORMAL_MODE (e)) | |
1094 | for (eg = BASIC_BLOCK (bb)->succ; eg; eg = eg->succ_next) | |
1095 | if (eg->dest == EXIT_BLOCK_PTR) | |
1096 | { | |
1097 | rtx insn = BLOCK_END (bb); | |
1098 | ||
1099 | /* Find the last insn before a USE and/or JUMP. */ | |
1100 | while ((GET_CODE (insn) == INSN | |
1101 | && GET_CODE (PATTERN (insn)) == USE) | |
1102 | || GET_CODE (insn) == JUMP_INSN) | |
1103 | insn = PREV_INSN (insn); | |
1104 | if (insn != BLOCK_END (bb) && NEXT_INSN (insn)) | |
1105 | insn = NEXT_INSN (insn); | |
1106 | last_mode = NORMAL_MODE (e); | |
1107 | add_seginfo (info + bb, | |
1108 | new_seginfo (last_mode, insn, bb, live_now)); | |
1109 | RESET_BIT (transp[bb], j); | |
1110 | } | |
1111 | #endif | |
1112 | ||
9f09b1f2 R |
1113 | info[bb].computing = last_mode; |
1114 | /* Check for blocks without ANY mode requirements. */ | |
1115 | if (last_mode == no_mode) | |
1116 | { | |
1117 | ptr = new_seginfo (no_mode, insn, bb, live_now); | |
1118 | add_seginfo (info + bb, ptr); | |
1119 | } | |
1120 | } | |
1270c255 | 1121 | #ifdef NORMAL_MODE |
9f09b1f2 | 1122 | { |
1270c255 | 1123 | int mode = NORMAL_MODE (e); |
f4e72d6e | 1124 | |
9f09b1f2 R |
1125 | if (mode != no_mode) |
1126 | { | |
1127 | for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next) | |
1128 | { | |
1129 | bb = eg->dest->index; | |
1130 | ||
1131 | /* By always making this nontransparent, we save | |
1132 | an extra check in make_preds_opaque. We also | |
1133 | need this to avoid confusing pre_edge_lcm when | |
1134 | antic is cleared but transp and comp are set. */ | |
1135 | RESET_BIT (transp[bb], j); | |
1136 | ||
1137 | /* If the block already has MODE, pretend it | |
1138 | has none (because we don't need to set it), | |
1139 | but retain whatever mode it computes. */ | |
1140 | if (info[bb].seginfo->mode == mode) | |
f4e72d6e RK |
1141 | info[bb].seginfo->mode = no_mode; |
1142 | ||
1143 | /* Insert a fake computing definition of MODE into entry | |
1144 | blocks which compute no mode. This represents the mode on | |
1145 | entry. */ | |
9f09b1f2 R |
1146 | else if (info[bb].computing == no_mode) |
1147 | { | |
1148 | info[bb].computing = mode; | |
1149 | info[bb].seginfo->mode = no_mode; | |
1150 | } | |
1151 | } | |
1152 | } | |
1153 | } | |
1270c255 | 1154 | #endif /* NORMAL_MODE */ |
9f09b1f2 R |
1155 | } |
1156 | ||
1157 | kill = sbitmap_vector_alloc (n_basic_blocks, n_entities); | |
1158 | for (i = 0; i < max_num_modes; i++) | |
1159 | { | |
1160 | int current_mode[N_ENTITIES]; | |
1161 | ||
1162 | /* Set the anticipatable and computing arrays. */ | |
1163 | sbitmap_vector_zero (antic, n_basic_blocks); | |
1164 | sbitmap_vector_zero (comp, n_basic_blocks); | |
1165 | for (j = n_entities - 1; j >= 0; j--) | |
1166 | { | |
1167 | int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i); | |
1168 | struct bb_info *info = bb_info[j]; | |
1169 | ||
1170 | for (bb = 0 ; bb < n_basic_blocks; bb++) | |
1171 | { | |
9f09b1f2 R |
1172 | if (info[bb].seginfo->mode == m) |
1173 | SET_BIT (antic[bb], j); | |
1174 | ||
1175 | if (info[bb].computing == m) | |
1176 | SET_BIT (comp[bb], j); | |
1177 | } | |
1178 | } | |
1179 | ||
1180 | /* Calculate the optimal locations for the | |
1181 | placement mode switches to modes with priority I. */ | |
1182 | ||
1183 | for (bb = n_basic_blocks - 1; bb >= 0; bb--) | |
1184 | sbitmap_not (kill[bb], transp[bb]); | |
1185 | edge_list = pre_edge_lcm (file, 1, transp, comp, antic, | |
1186 | kill, &insert, &delete); | |
1187 | ||
f4e72d6e | 1188 | for (j = n_entities - 1; j >= 0; j--) |
9f09b1f2 R |
1189 | { |
1190 | /* Insert all mode sets that have been inserted by lcm. */ | |
1191 | int no_mode = num_modes[entity_map[j]]; | |
f4e72d6e | 1192 | |
9f09b1f2 R |
1193 | /* Wherever we have moved a mode setting upwards in the flow graph, |
1194 | the blocks between the new setting site and the now redundant | |
1195 | computation ceases to be transparent for any lower-priority | |
1196 | mode of the same entity. First set the aux field of each | |
1197 | insertion site edge non-transparent, then propagate the new | |
1198 | non-transparency from the redundant computation upwards till | |
1199 | we hit an insertion site or an already non-transparent block. */ | |
1200 | for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--) | |
1201 | { | |
1202 | edge eg = INDEX_EDGE (edge_list, e); | |
1203 | int mode; | |
1204 | basic_block src_bb; | |
1205 | HARD_REG_SET live_at_edge; | |
1206 | rtx mode_set; | |
1207 | ||
1208 | eg->aux = 0; | |
1209 | ||
1210 | if (! TEST_BIT (insert[e], j)) | |
1211 | continue; | |
1212 | ||
1213 | eg->aux = (void *)1; | |
1214 | ||
1215 | mode = current_mode[j]; | |
1216 | src_bb = eg->src; | |
1217 | ||
f4e72d6e RK |
1218 | REG_SET_TO_HARD_REG_SET (live_at_edge, |
1219 | src_bb->global_live_at_end); | |
1220 | ||
9f09b1f2 R |
1221 | start_sequence (); |
1222 | EMIT_MODE_SET (entity_map[j], mode, live_at_edge); | |
1223 | mode_set = gen_sequence (); | |
1224 | end_sequence (); | |
1225 | ||
1226 | /* If this is an abnormal edge, we'll insert at the end of the | |
1227 | previous block. */ | |
1228 | if (eg->flags & EDGE_ABNORMAL) | |
1229 | { | |
9f09b1f2 R |
1230 | src_bb->end = emit_insn_after (mode_set, src_bb->end); |
1231 | bb_info[j][src_bb->index].computing = mode; | |
1232 | RESET_BIT (transp[src_bb->index], j); | |
1233 | } | |
1234 | else | |
1235 | { | |
1236 | need_commit = 1; | |
1237 | insert_insn_on_edge (mode_set, eg); | |
1238 | } | |
9f09b1f2 R |
1239 | } |
1240 | ||
1241 | for (bb = n_basic_blocks - 1; bb >= 0; bb--) | |
f4e72d6e RK |
1242 | if (TEST_BIT (delete[bb], j)) |
1243 | { | |
1244 | make_preds_opaque (BASIC_BLOCK (bb), j); | |
1245 | /* Cancel the 'deleted' mode set. */ | |
1246 | bb_info[j][bb].seginfo->mode = no_mode; | |
1247 | } | |
9f09b1f2 | 1248 | } |
f4e72d6e | 1249 | |
9f09b1f2 R |
1250 | free_edge_list (edge_list); |
1251 | } | |
1252 | ||
1253 | /* Now output the remaining mode sets in all the segments. */ | |
1254 | for (j = n_entities - 1; j >= 0; j--) | |
1255 | { | |
1270c255 CP |
1256 | int no_mode = num_modes[entity_map[j]]; |
1257 | ||
9f09b1f2 R |
1258 | for (bb = n_basic_blocks - 1; bb >= 0; bb--) |
1259 | { | |
1260 | struct seginfo *ptr, *next; | |
1261 | for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next) | |
1262 | { | |
1263 | next = ptr->next; | |
1270c255 | 1264 | if (ptr->mode != no_mode) |
9f09b1f2 R |
1265 | { |
1266 | rtx mode_set; | |
1267 | ||
1268 | start_sequence (); | |
1269 | EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live); | |
1270 | mode_set = gen_sequence (); | |
1271 | end_sequence (); | |
1272 | ||
25fa8bdc AO |
1273 | if (GET_CODE (ptr->insn_ptr) == NOTE |
1274 | && (NOTE_LINE_NUMBER (ptr->insn_ptr) | |
1275 | == NOTE_INSN_BASIC_BLOCK)) | |
1270c255 CP |
1276 | emit_block_insn_after (mode_set, ptr->insn_ptr, |
1277 | BASIC_BLOCK (ptr->bbnum)); | |
1278 | else | |
1279 | emit_block_insn_before (mode_set, ptr->insn_ptr, | |
1280 | BASIC_BLOCK (ptr->bbnum)); | |
9f09b1f2 | 1281 | } |
f4e72d6e | 1282 | |
9f09b1f2 R |
1283 | free (ptr); |
1284 | } | |
1285 | } | |
f4e72d6e | 1286 | |
9f09b1f2 R |
1287 | free (bb_info[j]); |
1288 | } | |
1289 | ||
1290 | /* Finished. Free up all the things we've allocated. */ | |
1291 | ||
1292 | sbitmap_vector_free (kill); | |
1293 | sbitmap_vector_free (antic); | |
1294 | sbitmap_vector_free (transp); | |
1295 | sbitmap_vector_free (comp); | |
1296 | sbitmap_vector_free (delete); | |
1297 | sbitmap_vector_free (insert); | |
1298 | ||
1299 | if (need_commit) | |
1300 | commit_edge_insertions (); | |
97d36f45 RH |
1301 | |
1302 | /* Ideally we'd figure out what blocks were affected and start from | |
1303 | there, but this is enormously complicated by commit_edge_insertions, | |
1304 | which would screw up any indicies we'd collected, and also need to | |
1305 | be involved in the update. Bail and recompute global life info for | |
1306 | everything. */ | |
1307 | ||
1308 | allocate_reg_life_data (); | |
1309 | update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, | |
1310 | (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE | |
1311 | | PROP_SCAN_DEAD_CODE | PROP_REG_INFO)); | |
1312 | ||
1313 | return 1; | |
9f09b1f2 | 1314 | } |
97d36f45 | 1315 | #endif /* OPTIMIZE_MODE_SWITCHING */ |