]> gcc.gnu.org Git - gcc.git/blob - gcc/gcse.c
gcse.c: New definition NEVER_SET for reg_first_set...
[gcc.git] / gcc / gcse.c
1 /* Global common subexpression elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - memory aliasing support
28 - ability to realloc sbitmap vectors would allow one initial computation
29 of reg_set_in_block with only subsequent additions, rather than
30 recomputing it for each pass
31
32 NOTES
33 - the classic gcse implementation is kept in for now for comparison
34 */
35
36 /* References searched while implementing this.
37 This list will eventually be deleted but I wanted to have a record of it
38 for now.
39
40 Compilers Principles, Techniques and Tools
41 Aho, Sethi, Ullman
42 Addison-Wesley, 1988
43
44 Global Optimization by Suppression of Partial Redundancies
45 E. Morel, C. Renvoise
46 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47
48 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Frederick Chow
50 Stanford Ph.D. thesis, Dec. 1983
51
52 xxx
53 Elimination Algorithms for Data Flow Analysis
54 B.G. Ryder, M.C. Paull
55 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
56
57 reread
58 A Fast Algorithm for Code Movement Optimization
59 D.M. Dhamdhere
60 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
61
62 A Solution to a Problem with Morel and Renvoise's
63 Global Optimization by Suppression of Partial Redundancies
64 K-H Drechsler, M.P. Stadel
65 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
66
67 Practical Adaptation of the Global Optimization
68 Algorithm of Morel and Renvoise
69 D.M. Dhamdhere
70 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
71
72 Efficiently Computing Static Single Assignment Form and the Control
73 Dependence Graph
74 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
75 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
76
77 yyy
78 How to Analyze Large Programs Efficiently and Informatively
79 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
80 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
81
82 Lazy Code Motion
83 J. Knoop, O. Ruthing, B. Steffen
84 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
85
86 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
87 Time for Reducible Flow Control
88 Thomas Ball
89 ACM Letters on Programming Languages and Systems,
90 Vol. 2, Num. 1-4, Mar-Dec 1993
91
92 An Efficient Representation for Sparse Sets
93 Preston Briggs, Linda Torczon
94 ACM Letters on Programming Languages and Systems,
95 Vol. 2, Num. 1-4, Mar-Dec 1993
96
97 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
98 K-H Drechsler, M.P. Stadel
99 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
100
101 Partial Dead Code Elimination
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
104
105 Effective Partial Redundancy Elimination
106 P. Briggs, K.D. Cooper
107 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
108
109 The Program Structure Tree: Computing Control Regions in Linear Time
110 R. Johnson, D. Pearson, K. Pingali
111 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
112
113 Optimal Code Motion: Theory and Practice
114 J. Knoop, O. Ruthing, B. Steffen
115 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
116
117 The power of assignment motion
118 J. Knoop, O. Ruthing, B. Steffen
119 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
120
121 Global code motion / global value numbering
122 C. Click
123 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
124
125 Value Driven Redundancy Elimination
126 L.T. Simpson
127 Rice University Ph.D. thesis, Apr. 1996
128
129 Value Numbering
130 L.T. Simpson
131 Massively Scalar Compiler Project, Rice University, Sep. 1996
132
133 High Performance Compilers for Parallel Computing
134 Michael Wolfe
135 Addison-Wesley, 1996
136
137 People wishing to speed up the code here should read xxx, yyy.
138 People wishing to do something different can find various possibilities
139 in the above papers and elsewhere.
140 */
141
142 #include "config.h"
143 /* Must precede rtl.h for FFS. */
144 #include "system.h"
145
146 #include "rtl.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "real.h"
151 #include "insn-config.h"
152 #include "recog.h"
153 #include "basic-block.h"
154 #include "output.h"
155
156 #include "obstack.h"
157 #define obstack_chunk_alloc gmalloc
158 #define obstack_chunk_free free
159
160 /* Maximum number of passes to perform. */
161 #define MAX_PASSES 1
162
163 /* Propagate flow information through back edges and thus enable PRE's
164 moving loop invariant calculations out of loops.
165
166 Originally this tended to create worse overall code, but several
167 improvements during the development of PRE seem to have made following
168 back edges generally a win.
169
170 Note much of the loop invariant code motion done here would normally
171 be done by loop.c, which has more heuristics for when to move invariants
172 out of loops. At some point we might need to move some of those
173 heuristics into gcse.c. */
174 #define FOLLOW_BACK_EDGES 1
175
176 /* We support two GCSE implementations: Classic GCSE (i.e. Dragon Book)
177 and PRE (Partial Redundancy Elimination) GCSE (based on Fred Chow's thesis).
178 The default is PRE.
179
180 In either case we perform the following steps:
181
182 1) Compute basic block information.
183
184 2) Compute table of places where registers are set.
185
186 3) Perform copy/constant propagation.
187
188 4) Perform global cse.
189
190 5) Perform another pass of copy/constant propagation [only if PRE].
191
192 Two passes of copy/constant propagation are done because the first one
193 enables more GCSE and the second one helps to clean up the copies that
194 GCSE creates. This is needed more for PRE than for Classic because Classic
195 GCSE will try to use an existing register containing the common
196 subexpression rather than create a new one. This is harder to do for PRE
197 because of the code motion (which Classic GCSE doesn't do).
198
199 Expressions we are interested in GCSE-ing are of the form
200 (set (pseudo-reg) (expression)).
201 Function want_to_gcse_p says what these are.
202
203 PRE handles moving invariant expressions out of loops (by treating them as
204 partially redundant). This feature of PRE is disabled here (by not
205 propagating dataflow information along back edges) because loop.c has more
206 involved (and thus typically better) heuristics for what to move out of
207 loops.
208
209 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
210 assignment) based GVN (global value numbering). L. T. Simpson's paper
211 (Rice University) on value numbering is a useful reference for this.
212
213 **********************
214
215 We used to support multiple passes but there are diminishing returns in
216 doing so. The first pass usually makes 90% of the changes that are doable.
217 A second pass can make a few more changes made possible by the first pass.
218 Experiments show any further passes don't make enough changes to justify
219 the expense.
220
221 A study of spec92 using an unlimited number of passes:
222 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
223 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
224 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
225
226 It was found doing copy propagation between each pass enables further
227 substitutions.
228
229 PRE is quite expensive in complicated functions because the DFA can take
230 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
231 be modified if one wants to experiment.
232
233 **********************
234
235 The steps for PRE are:
236
237 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
238
239 2) Perform the data flow analysis for PRE.
240
241 3) Delete the redundant instructions
242
243 4) Insert the required copies [if any] that make the partially
244 redundant instructions fully redundant.
245
246 5) For other reaching expressions, insert an instruction to copy the value
247 to a newly created pseudo that will reach the redundant instruction.
248
249 The deletion is done first so that when we do insertions we
250 know which pseudo reg to use.
251
252 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
253 argue it is not. The number of iterations for the algorithm to converge
254 is typically 2-4 so I don't view it as that expensive (relatively speaking).
255
256 PRE GCSE depends heavily on the seconds CSE pass to clean up the copies
257 we create. To make an expression reach the place where it's redundant,
258 the result of the expression is copied to a new register, and the redundant
259 expression is deleted by replacing it with this new register. Classic GCSE
260 doesn't have this problem as much as it computes the reaching defs of
261 each register in each block and thus can try to use an existing register.
262
263 **********************
264
265 When -fclassic-gcse, we perform a classic global CSE pass.
266 It is based on the algorithms in the Dragon book, and is based on code
267 written by Devor Tevi at Intel.
268
269 The steps for Classic GCSE are:
270
271 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
272 Also recorded are reaching definition "gen" statements (rd_gen)
273
274 2) Compute the reaching definitions (reaching_defs).
275 This is a bitmap for each basic block indexed by INSN_CUID that is 1
276 for each statement containing a definition that reaches the block.
277
278 3) Compute the available expressions (ae_in).
279 This is a bitmap for each basic block indexed by expression number
280 that is 1 for each expression that is available at the beginning of
281 the block.
282
283 4) Perform GCSE.
284 This is done by scanning each instruction looking for sets of the form
285 (set (pseudo-reg) (expression)) and checking if `expression' is in the
286 hash table. If it is, and if the expression is available, and if only
287 one computation of the expression reaches the instruction, we substitute
288 the expression for a register containing its value. If there is no
289 such register, but the expression is expensive enough we create an
290 instruction to copy the result of the expression into and use that.
291
292 **********************
293
294 A fair bit of simplicity is created by creating small functions for simple
295 tasks, even when the function is only called in one place. This may
296 measurably slow things down [or may not] by creating more function call
297 overhead than is necessary. The source is laid out so that it's trivial
298 to make the affected functions inline so that one can measure what speed
299 up, if any, can be achieved, and maybe later when things settle things can
300 be rearranged.
301
302 Help stamp out big monolithic functions! */
303 \f
304 /* GCSE global vars. */
305
306 /* -dG dump file. */
307 static FILE *gcse_file;
308
309 /* Bitmaps are normally not included in debugging dumps.
310 However it's useful to be able to print them from GDB.
311 We could create special functions for this, but it's simpler to
312 just allow passing stderr to the dump_foo fns. Since stderr can
313 be a macro, we store a copy here. */
314 static FILE *debug_stderr;
315
316 /* An obstack for our working variables. */
317 static struct obstack gcse_obstack;
318
319 /* Non-zero for each mode that supports (set (reg) (reg)).
320 This is trivially true for integer and floating point values.
321 It may or may not be true for condition codes. */
322 static char can_copy_p[(int) NUM_MACHINE_MODES];
323
324 /* Non-zero if can_copy_p has been initialized. */
325 static int can_copy_init_p;
326
327 /* Element I is a list of I's predecessors/successors. */
328 static int_list_ptr *s_preds;
329 static int_list_ptr *s_succs;
330
331 /* Element I is the number of predecessors/successors of basic block I. */
332 static int *num_preds;
333 static int *num_succs;
334
335 /* Hash table of expressions. */
336
337 struct expr
338 {
339 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
340 rtx expr;
341 /* Index in the available expression bitmaps. */
342 int bitmap_index;
343 /* Next entry with the same hash. */
344 struct expr *next_same_hash;
345 /* List of anticipatable occurrences in basic blocks in the function.
346 An "anticipatable occurrence" is one that is the first occurrence in the
347 basic block and the operands are not modified in the basic block prior
348 to the occurrence. */
349 struct occr *antic_occr;
350 /* List of available occurrence in basic blocks in the function.
351 An "available occurrence" is one that is the last occurrence in the
352 basic block and the operands are not modified by following statements in
353 the basic block [including this insn]. */
354 struct occr *avail_occr;
355 /* Non-null if the computation is PRE redundant.
356 The value is the newly created pseudo-reg to record a copy of the
357 expression in all the places that reach the redundant copy. */
358 rtx reaching_reg;
359 };
360
361 /* Occurrence of an expression.
362 There is one per basic block. If a pattern appears more than once the
363 last appearance is used [or first for anticipatable expressions]. */
364
365 struct occr
366 {
367 /* Next occurrence of this expression. */
368 struct occr *next;
369 /* The insn that computes the expression. */
370 rtx insn;
371 /* Non-zero if this [anticipatable] occurrence has been deleted. */
372 char deleted_p;
373 /* Non-zero if this [available] occurrence has been copied to
374 reaching_reg. */
375 /* ??? This is mutually exclusive with deleted_p, so they could share
376 the same byte. */
377 char copied_p;
378 };
379
380 /* Expression and copy propagation hash tables.
381 Each hash table is an array of buckets.
382 ??? It is known that if it were an array of entries, structure elements
383 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
384 not clear whether in the final analysis a sufficient amount of memory would
385 be saved as the size of the available expression bitmaps would be larger
386 [one could build a mapping table without holes afterwards though].
387 Someday I'll perform the computation and figure it out.
388 */
389
390 /* Total size of the expression hash table, in elements. */
391 static int expr_hash_table_size;
392 /* The table itself.
393 This is an array of `expr_hash_table_size' elements. */
394 static struct expr **expr_hash_table;
395
396 /* Total size of the copy propagation hash table, in elements. */
397 static int set_hash_table_size;
398 /* The table itself.
399 This is an array of `set_hash_table_size' elements. */
400 static struct expr **set_hash_table;
401
402 /* Mapping of uids to cuids.
403 Only real insns get cuids. */
404 static int *uid_cuid;
405
406 /* Highest UID in UID_CUID. */
407 static int max_uid;
408
409 /* Get the cuid of an insn. */
410 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
411
412 /* Number of cuids. */
413 static int max_cuid;
414
415 /* Mapping of cuids to insns. */
416 static rtx *cuid_insn;
417
418 /* Get insn from cuid. */
419 #define CUID_INSN(CUID) (cuid_insn[CUID])
420
421 /* Maximum register number in function prior to doing gcse + 1.
422 Registers created during this pass have regno >= max_gcse_regno.
423 This is named with "gcse" to not collide with global of same name. */
424 static int max_gcse_regno;
425
426 /* Maximum number of cse-able expressions found. */
427 static int n_exprs;
428 /* Maximum number of assignments for copy propagation found. */
429 static int n_sets;
430
431 /* Table of registers that are modified.
432 For each register, each element is a list of places where the pseudo-reg
433 is set.
434
435 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
436 requires knowledge of which blocks kill which regs [and thus could use
437 a bitmap instead of the lists `reg_set_table' uses]. The classic GCSE
438 uses the information in lists.
439
440 If the classic GCSE pass is deleted `reg_set_table' and could be turned
441 into an array of bitmaps (num-bbs x num-regs)
442 [however perhaps it may be useful to keep the data as is].
443 One advantage of recording things this way is that `reg_set_table' is
444 fairly sparse with respect to pseudo regs but for hard regs could be
445 fairly dense [relatively speaking].
446 And recording sets of pseudo-regs in lists speeds
447 up functions like compute_transp since in the case of pseudo-regs we only
448 need to iterate over the number of times a pseudo-reg is set, not over the
449 number of basic blocks [clearly there is a bit of a slow down in the cases
450 where a pseudo is set more than once in a block, however it is believed
451 that the net effect is to speed things up]. This isn't done for hard-regs
452 because recording call-clobbered hard-regs in `reg_set_table' at each
453 function call can consume a fair bit of memory, and iterating over hard-regs
454 stored this way in compute_transp will be more expensive. */
455
456 typedef struct reg_set {
457 /* The next setting of this register. */
458 struct reg_set *next;
459 /* The insn where it was set. */
460 rtx insn;
461 } reg_set;
462 static reg_set **reg_set_table;
463 /* Size of `reg_set_table'.
464 The table starts out at max_gcse_regno + slop, and is enlarged as
465 necessary. */
466 static int reg_set_table_size;
467 /* Amount to grow `reg_set_table' by when it's full. */
468 #define REG_SET_TABLE_SLOP 100
469
470 /* Bitmap containing one bit for each register in the program.
471 Used when performing GCSE to track which registers have been set since
472 the start of the basic block. */
473 static sbitmap reg_set_bitmap;
474
475 /* For each block, a bitmap of registers set in the block.
476 This is used by expr_killed_p and compute_transp.
477 It is computed during hash table computation and not by compute_sets
478 as it includes registers added since the last pass (or between cprop and
479 gcse) and it's currently not easy to realloc sbitmap vectors. */
480 static sbitmap *reg_set_in_block;
481
482 /* For each block, non-zero if memory is set in that block.
483 This is computed during hash table computation and is used by
484 expr_killed_p and compute_transp.
485 ??? Handling of memory is very simple, we don't make any attempt
486 to optimize things (later).
487 ??? This can be computed by compute_sets since the information
488 doesn't change. */
489 static char *mem_set_in_block;
490
491 /* Various variables for statistics gathering. */
492
493 /* Memory used in a pass.
494 This isn't intended to be absolutely precise. Its intent is only
495 to keep an eye on memory usage. */
496 static int bytes_used;
497 /* GCSE substitutions made. */
498 static int gcse_subst_count;
499 /* Number of copy instructions created. */
500 static int gcse_create_count;
501 /* Number of constants propagated. */
502 static int const_prop_count;
503 /* Number of copys propagated. */
504 static int copy_prop_count;
505
506 extern char *current_function_name;
507 extern int current_function_calls_setjmp;
508 extern int current_function_calls_longjmp;
509 \f
510 /* These variables are used by classic GCSE.
511 Normally they'd be defined a bit later, but `rd_gen' needs to
512 be declared sooner. */
513
514 /* A bitmap of all ones for implementing the algorithm for available
515 expressions and reaching definitions. */
516 /* ??? Available expression bitmaps have a different size than reaching
517 definition bitmaps. This should be the larger of the two, however, it
518 is not currently used for reaching definitions. */
519 static sbitmap u_bitmap;
520
521 /* Each block has a bitmap of each type.
522 The length of each blocks bitmap is:
523
524 max_cuid - for reaching definitions
525 n_exprs - for available expressions
526
527 Thus we view the bitmaps as 2 dimensional arrays. i.e.
528 rd_kill[block_num][cuid_num]
529 ae_kill[block_num][expr_num]
530 */
531
532 /* For reaching defs */
533 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
534
535 /* for available exprs */
536 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
537 \f
538 static void compute_can_copy PROTO ((void));
539
540 static char *gmalloc PROTO ((unsigned int));
541 static char *grealloc PROTO ((char *, unsigned int));
542 static char *gcse_alloc PROTO ((unsigned long));
543 static void alloc_gcse_mem PROTO ((rtx));
544 static void free_gcse_mem PROTO ((void));
545 extern void dump_cuid_table PROTO ((FILE *));
546
547 static void alloc_reg_set_mem PROTO ((int));
548 static void free_reg_set_mem PROTO ((void));
549 static void record_one_set PROTO ((int, rtx));
550 static void record_set_info PROTO ((rtx, rtx));
551 static void compute_sets PROTO ((rtx));
552
553 static void hash_scan_insn PROTO ((rtx, int, int));
554 static void hash_scan_set PROTO ((rtx, rtx, int));
555 static void hash_scan_clobber PROTO ((rtx, rtx));
556 static void hash_scan_call PROTO ((rtx, rtx));
557 static void maybe_set_rd_gen PROTO ((int, rtx));
558 static int want_to_gcse_p PROTO ((rtx));
559 static int oprs_unchanged_p PROTO ((rtx, rtx, int));
560 static int oprs_anticipatable_p PROTO ((rtx, rtx));
561 static int oprs_available_p PROTO ((rtx, rtx));
562 static void insert_expr_in_table PROTO ((rtx, enum machine_mode, rtx, int, int));
563 static void insert_set_in_table PROTO ((rtx, rtx));
564 static unsigned int hash_expr PROTO ((rtx, enum machine_mode, int *, int));
565 static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *));
566 static unsigned int hash_set PROTO ((int, int));
567 static int expr_equiv_p PROTO ((rtx, rtx));
568 static void record_last_reg_set_info PROTO ((rtx, int));
569 static void record_last_mem_set_info PROTO ((rtx));
570 static void record_last_set_info PROTO ((rtx, rtx));
571 static void compute_hash_table PROTO ((rtx, int));
572 static void alloc_set_hash_table PROTO ((int));
573 static void free_set_hash_table PROTO ((void));
574 static void compute_set_hash_table PROTO ((rtx));
575 static void alloc_expr_hash_table PROTO ((int));
576 static void free_expr_hash_table PROTO ((void));
577 static void compute_expr_hash_table PROTO ((rtx));
578 static void dump_hash_table PROTO ((FILE *, char *, struct expr **, int, int));
579 static struct expr *lookup_expr PROTO ((rtx));
580 static struct expr *lookup_set PROTO ((int, rtx));
581 static struct expr *next_set PROTO ((int, struct expr *));
582 static void reset_opr_set_tables PROTO ((void));
583 static int oprs_not_set_p PROTO ((rtx, rtx));
584 static void mark_call PROTO ((rtx, rtx));
585 static void mark_set PROTO ((rtx, rtx));
586 static void mark_clobber PROTO ((rtx, rtx));
587 static void mark_oprs_set PROTO ((rtx));
588
589 static void alloc_rd_mem PROTO ((int, int));
590 static void free_rd_mem PROTO ((void));
591 static void compute_kill_rd PROTO ((void));
592 static void handle_rd_kill_set PROTO ((rtx, int, int));
593 static void compute_rd PROTO ((void));
594 extern void dump_rd_table PROTO ((FILE *, char *, sbitmap *));
595
596 static void alloc_avail_expr_mem PROTO ((int, int));
597 static void free_avail_expr_mem PROTO ((void));
598 static void compute_ae_gen PROTO ((void));
599 static void compute_ae_kill PROTO ((void));
600 static int expr_killed_p PROTO ((rtx, int));
601 static void compute_available PROTO ((void));
602
603 static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
604 int, int, char *));
605 static rtx computing_insn PROTO ((struct expr *, rtx));
606 static int def_reaches_here_p PROTO ((rtx, rtx));
607 static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int));
608 static int handle_avail_expr PROTO ((rtx, struct expr *));
609 static int classic_gcse PROTO ((void));
610 static int one_classic_gcse_pass PROTO ((rtx, int));
611
612 static void alloc_cprop_mem PROTO ((int, int));
613 static void free_cprop_mem PROTO ((void));
614 extern void dump_cprop_data PROTO ((FILE *));
615 static void compute_transp PROTO ((rtx, int, sbitmap *, int));
616 static void compute_cprop_local_properties PROTO ((void));
617 static void compute_cprop_avinout PROTO ((void));
618 static void compute_cprop_data PROTO ((void));
619 static void find_used_regs PROTO ((rtx));
620 static int try_replace_reg PROTO ((rtx, rtx, rtx));
621 static struct expr *find_avail_set PROTO ((int, rtx));
622 static int cprop_insn PROTO ((rtx));
623 static int cprop PROTO ((void));
624 static int one_cprop_pass PROTO ((rtx, int));
625
626 static void alloc_pre_mem PROTO ((int, int));
627 static void free_pre_mem PROTO ((void));
628 extern void dump_pre_data PROTO ((FILE *));
629 static void compute_pre_local_properties PROTO ((void));
630 static void compute_pre_avinout PROTO ((void));
631 static void compute_pre_antinout PROTO ((void));
632 static void compute_pre_pavinout PROTO ((void));
633 static void compute_pre_ppinout PROTO ((void));
634 static void compute_pre_data PROTO ((void));
635 static int pre_expr_reaches_here_p PROTO ((struct occr *, struct expr *,
636 int, char *));
637 static void pre_insert_insn PROTO ((struct expr *, int));
638 static void pre_insert PROTO ((struct expr **));
639 static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
640 static void pre_insert_copies PROTO ((void));
641 static int pre_delete PROTO ((void));
642 static int pre_gcse PROTO ((void));
643 static int one_pre_gcse_pass PROTO ((rtx, int));
644
645 static void add_label_notes PROTO ((rtx, rtx));
646 \f
647 /* Entry point for global common subexpression elimination.
648 F is the first instruction in the function. */
649
650 void
651 gcse_main (f, file)
652 rtx f;
653 FILE *file;
654 {
655 int changed, pass;
656 /* Bytes used at start of pass. */
657 int initial_bytes_used;
658 /* Maximum number of bytes used by a pass. */
659 int max_pass_bytes;
660 /* Point to release obstack data from for each pass. */
661 char *gcse_obstack_bottom;
662
663 /* It's impossible to construct a correct control flow graph in the
664 presense of setjmp, so just punt to be safe. */
665 if (current_function_calls_setjmp)
666 return;
667
668 /* For calling dump_foo fns from gdb. */
669 debug_stderr = stderr;
670
671 max_gcse_regno = max_reg_num ();
672 find_basic_blocks (f, max_gcse_regno, file, 0);
673
674 /* Return if there's nothing to do. */
675 if (n_basic_blocks <= 1)
676 {
677 /* Free storage allocated by find_basic_blocks. */
678 free_basic_block_vars (0);
679 return;
680 }
681
682 /* See what modes support reg/reg copy operations. */
683 if (! can_copy_init_p)
684 {
685 compute_can_copy ();
686 can_copy_init_p = 1;
687 }
688
689 gcc_obstack_init (&gcse_obstack);
690
691 gcse_file = file;
692
693 /* Allocate and compute predecessors/successors. */
694
695 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
696 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
697 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
698 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
699 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
700 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
701
702 if (file)
703 {
704 dump_bb_data (file, s_preds, s_succs);
705 }
706
707 /* Record where pseudo-registers are set.
708 This data is kept accurate during each pass.
709 ??? We could also record hard-reg and memory information here
710 [since it's unchanging], however it is currently done during
711 hash table computation. */
712
713 alloc_reg_set_mem (max_gcse_regno);
714 compute_sets (f);
715
716 pass = 0;
717 initial_bytes_used = bytes_used;
718 max_pass_bytes = 0;
719 gcse_obstack_bottom = gcse_alloc (1);
720 changed = 1;
721 while (changed && pass < MAX_PASSES)
722 {
723 changed = 0;
724 if (file)
725 fprintf (file, "GCSE pass %d\n\n", pass + 1);
726
727 /* Initialize bytes_used to the space for the pred/succ lists,
728 and the reg_set_table data. */
729 bytes_used = initial_bytes_used;
730
731 /* Each pass may create new registers, so recalculate each time. */
732 max_gcse_regno = max_reg_num ();
733
734 alloc_gcse_mem (f);
735
736 changed = one_cprop_pass (f, pass + 1);
737
738 if (optimize_size)
739 changed |= one_classic_gcse_pass (f, pass + 1);
740 else
741 changed |= one_pre_gcse_pass (f, pass + 1);
742
743 if (max_pass_bytes < bytes_used)
744 max_pass_bytes = bytes_used;
745
746 free_gcse_mem ();
747
748 if (file)
749 {
750 fprintf (file, "\n");
751 fflush (file);
752 }
753 obstack_free (&gcse_obstack, gcse_obstack_bottom);
754 pass++;
755 }
756
757 /* If we're doing PRE, do one last pass of copy propagation. */
758 if (! optimize_size)
759 {
760 max_gcse_regno = max_reg_num ();
761 alloc_gcse_mem (f);
762 one_cprop_pass (f, pass + 1);
763 free_gcse_mem ();
764 }
765
766 if (file)
767 {
768 fprintf (file, "GCSE of %s: %d basic blocks, ",
769 current_function_name, n_basic_blocks);
770 fprintf (file, "%d pass%s, %d bytes\n\n",
771 pass, pass > 1 ? "es" : "", max_pass_bytes);
772 }
773
774 /* Free our obstack. */
775 obstack_free (&gcse_obstack, NULL_PTR);
776 /* Free reg_set_table. */
777 free_reg_set_mem ();
778 /* Free storage used to record predecessor/successor data. */
779 free_bb_mem ();
780 /* Free storage allocated by find_basic_blocks. */
781 free_basic_block_vars (0);
782 }
783 \f
784 /* Misc. utilities. */
785
786 /* Compute which modes support reg/reg copy operations. */
787
788 static void
789 compute_can_copy ()
790 {
791 int i;
792 #ifndef AVOID_CCMODE_COPIES
793 rtx reg,insn;
794 #endif
795 char *free_point = (char *) oballoc (1);
796
797 bzero (can_copy_p, NUM_MACHINE_MODES);
798
799 start_sequence ();
800 for (i = 0; i < NUM_MACHINE_MODES; i++)
801 {
802 switch (GET_MODE_CLASS (i))
803 {
804 case MODE_CC :
805 #ifdef AVOID_CCMODE_COPIES
806 can_copy_p[i] = 0;
807 #else
808 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
809 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
810 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
811 can_copy_p[i] = 1;
812 #endif
813 break;
814 default :
815 can_copy_p[i] = 1;
816 break;
817 }
818 }
819 end_sequence ();
820
821 /* Free the objects we just allocated. */
822 obfree (free_point);
823 }
824 \f
825 /* Cover function to xmalloc to record bytes allocated. */
826
827 static char *
828 gmalloc (size)
829 unsigned int size;
830 {
831 bytes_used += size;
832 return xmalloc (size);
833 }
834
835 /* Cover function to xrealloc.
836 We don't record the additional size since we don't know it.
837 It won't affect memory usage stats much anyway. */
838
839 static char *
840 grealloc (ptr, size)
841 char *ptr;
842 unsigned int size;
843 {
844 return xrealloc (ptr, size);
845 }
846
847 /* Cover function to obstack_alloc.
848 We don't need to record the bytes allocated here since
849 obstack_chunk_alloc is set to gmalloc. */
850
851 static char *
852 gcse_alloc (size)
853 unsigned long size;
854 {
855 return (char *) obstack_alloc (&gcse_obstack, size);
856 }
857
858 /* Allocate memory for the cuid mapping array,
859 and reg/memory set tracking tables.
860
861 This is called at the start of each pass. */
862
863 static void
864 alloc_gcse_mem (f)
865 rtx f;
866 {
867 int i,n;
868 rtx insn;
869
870 /* Find the largest UID and create a mapping from UIDs to CUIDs.
871 CUIDs are like UIDs except they increase monotonically, have no gaps,
872 and only apply to real insns. */
873
874 max_uid = get_max_uid ();
875 n = (max_uid + 1) * sizeof (int);
876 uid_cuid = (int *) gmalloc (n);
877 bzero ((char *) uid_cuid, n);
878 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
879 {
880 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
881 INSN_CUID (insn) = i++;
882 else
883 INSN_CUID (insn) = i;
884 }
885
886 /* Create a table mapping cuids to insns. */
887
888 max_cuid = i;
889 n = (max_cuid + 1) * sizeof (rtx);
890 cuid_insn = (rtx *) gmalloc (n);
891 bzero ((char *) cuid_insn, n);
892 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
893 {
894 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
895 {
896 CUID_INSN (i) = insn;
897 i++;
898 }
899 }
900
901 /* Allocate vars to track sets of regs. */
902
903 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
904
905 /* Allocate vars to track sets of regs, memory per block. */
906
907 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
908 max_gcse_regno);
909 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
910 }
911
912 /* Free memory allocated by alloc_gcse_mem. */
913
914 static void
915 free_gcse_mem ()
916 {
917 free (uid_cuid);
918 free (cuid_insn);
919
920 free (reg_set_bitmap);
921
922 free (reg_set_in_block);
923 free (mem_set_in_block);
924 }
925
926 void
927 dump_cuid_table (file)
928 FILE *file;
929 {
930 int i,n;
931
932 fprintf (file, "CUID table\n");
933 for (i = n = 0; i < max_cuid; i++)
934 {
935 rtx insn = CUID_INSN (i);
936 if (n != 0 && n % 10 == 0)
937 fprintf (file, "\n");
938 if (insn != NULL)
939 fprintf (file, " %d", INSN_UID (insn));
940 }
941 fprintf (file, "\n\n");
942 }
943 \f
944 /* Register set information.
945
946 `reg_set_table' records where each register is set or otherwise
947 modified. */
948
949 static struct obstack reg_set_obstack;
950
951 static void
952 alloc_reg_set_mem (n_regs)
953 int n_regs;
954 {
955 int n;
956
957 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
958 n = reg_set_table_size * sizeof (struct reg_set *);
959 reg_set_table = (struct reg_set **) gmalloc (n);
960 bzero ((char *) reg_set_table, n);
961
962 gcc_obstack_init (&reg_set_obstack);
963 }
964
965 static void
966 free_reg_set_mem ()
967 {
968 free (reg_set_table);
969 obstack_free (&reg_set_obstack, NULL_PTR);
970 }
971
972 /* Record REGNO in the reg_set table. */
973
974 static void
975 record_one_set (regno, insn)
976 int regno;
977 rtx insn;
978 {
979 /* allocate a new reg_set element and link it onto the list */
980 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
981
982 /* If the table isn't big enough, enlarge it. */
983 if (regno >= reg_set_table_size)
984 {
985 int new_size = regno + REG_SET_TABLE_SLOP;
986 reg_set_table = (struct reg_set **)
987 grealloc ((char *) reg_set_table,
988 new_size * sizeof (struct reg_set *));
989 bzero ((char *) (reg_set_table + reg_set_table_size),
990 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
991 reg_set_table_size = new_size;
992 }
993
994 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
995 sizeof (struct reg_set));
996 bytes_used += sizeof (struct reg_set);
997 new_reg_info->insn = insn;
998 new_reg_info->next = NULL;
999 if (reg_set_table[regno] == NULL)
1000 reg_set_table[regno] = new_reg_info;
1001 else
1002 {
1003 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1004 /* ??? One could keep a "last" pointer to speed this up. */
1005 while (reg_info_ptr1 != NULL)
1006 {
1007 reg_info_ptr2 = reg_info_ptr1;
1008 reg_info_ptr1 = reg_info_ptr1->next;
1009 }
1010 reg_info_ptr2->next = new_reg_info;
1011 }
1012 }
1013
1014 /* For communication between next two functions (via note_stores). */
1015 static rtx record_set_insn;
1016
1017 /* Called from compute_sets via note_stores to handle one
1018 SET or CLOBBER in an insn. */
1019
1020 static void
1021 record_set_info (dest, setter)
1022 rtx dest, setter ATTRIBUTE_UNUSED;
1023 {
1024 if (GET_CODE (dest) == SUBREG)
1025 dest = SUBREG_REG (dest);
1026
1027 if (GET_CODE (dest) == REG)
1028 {
1029 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1030 record_one_set (REGNO (dest), record_set_insn);
1031 }
1032 }
1033
1034 /* Scan the function and record each set of each pseudo-register.
1035
1036 This is called once, at the start of the gcse pass.
1037 See the comments for `reg_set_table' for further docs. */
1038
1039 static void
1040 compute_sets (f)
1041 rtx f;
1042 {
1043 rtx insn = f;
1044
1045 while (insn)
1046 {
1047 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1048 {
1049 record_set_insn = insn;
1050 note_stores (PATTERN (insn), record_set_info);
1051 }
1052 insn = NEXT_INSN (insn);
1053 }
1054 }
1055 \f
1056 /* Hash table support. */
1057
1058 #define NEVER_SET -1
1059
1060 /* For each register, the cuid of the first/last insn in the block to set it,
1061 or zero if not set. */
1062 static int *reg_first_set;
1063 static int *reg_last_set;
1064
1065 /* While computing "first/last set" info, this is the CUID of first/last insn
1066 to set memory or zero if not set. `mem_last_set' is also used when
1067 performing GCSE to record whether memory has been set since the beginning
1068 of the block.
1069 Note that handling of memory is very simple, we don't make any attempt
1070 to optimize things (later). */
1071 static int mem_first_set;
1072 static int mem_last_set;
1073
1074 /* Set the appropriate bit in `rd_gen' [the gen for reaching defs] if the
1075 register set in this insn is not set after this insn in this block. */
1076
1077 static void
1078 maybe_set_rd_gen (regno, insn)
1079 int regno;
1080 rtx insn;
1081 {
1082 if (reg_last_set[regno] <= INSN_CUID (insn))
1083 SET_BIT (rd_gen[BLOCK_NUM (insn)], INSN_CUID (insn));
1084 }
1085
1086 /* Perform a quick check whether X, the source of a set, is something
1087 we want to consider for GCSE. */
1088
1089 static int
1090 want_to_gcse_p (x)
1091 rtx x;
1092 {
1093 enum rtx_code code = GET_CODE (x);
1094
1095 switch (code)
1096 {
1097 case REG:
1098 case SUBREG:
1099 case CONST_INT:
1100 case CONST_DOUBLE:
1101 case CALL:
1102 return 0;
1103
1104 default:
1105 break;
1106 }
1107
1108 return 1;
1109 }
1110
1111 /* Return non-zero if the operands of expression X are unchanged from the
1112 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1113 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1114
1115 static int
1116 oprs_unchanged_p (x, insn, avail_p)
1117 rtx x, insn;
1118 int avail_p;
1119 {
1120 int i;
1121 enum rtx_code code;
1122 char *fmt;
1123
1124 /* repeat is used to turn tail-recursion into iteration. */
1125 repeat:
1126
1127 if (x == 0)
1128 return 1;
1129
1130 code = GET_CODE (x);
1131 switch (code)
1132 {
1133 case REG:
1134 if (avail_p)
1135 return (reg_last_set[REGNO (x)] == NEVER_SET
1136 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1137 else
1138 return (reg_first_set[REGNO (x)] == NEVER_SET
1139 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1140
1141 case MEM:
1142 if (avail_p)
1143 {
1144 if (mem_last_set != NEVER_SET
1145 && mem_last_set >= INSN_CUID (insn))
1146 return 0;
1147 }
1148 else
1149 {
1150 if (mem_first_set != NEVER_SET
1151 && mem_first_set < INSN_CUID (insn))
1152 return 0;
1153 }
1154 x = XEXP (x, 0);
1155 goto repeat;
1156
1157 case PRE_DEC:
1158 case PRE_INC:
1159 case POST_DEC:
1160 case POST_INC:
1161 return 0;
1162
1163 case PC:
1164 case CC0: /*FIXME*/
1165 case CONST:
1166 case CONST_INT:
1167 case CONST_DOUBLE:
1168 case SYMBOL_REF:
1169 case LABEL_REF:
1170 case ADDR_VEC:
1171 case ADDR_DIFF_VEC:
1172 return 1;
1173
1174 default:
1175 break;
1176 }
1177
1178 i = GET_RTX_LENGTH (code) - 1;
1179 fmt = GET_RTX_FORMAT (code);
1180 for (; i >= 0; i--)
1181 {
1182 if (fmt[i] == 'e')
1183 {
1184 rtx tem = XEXP (x, i);
1185
1186 /* If we are about to do the last recursive call
1187 needed at this level, change it into iteration.
1188 This function is called enough to be worth it. */
1189 if (i == 0)
1190 {
1191 x = tem;
1192 goto repeat;
1193 }
1194 if (! oprs_unchanged_p (tem, insn, avail_p))
1195 return 0;
1196 }
1197 else if (fmt[i] == 'E')
1198 {
1199 int j;
1200 for (j = 0; j < XVECLEN (x, i); j++)
1201 {
1202 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1203 return 0;
1204 }
1205 }
1206 }
1207
1208 return 1;
1209 }
1210
1211 /* Return non-zero if the operands of expression X are unchanged from
1212 the start of INSN's basic block up to but not including INSN. */
1213
1214 static int
1215 oprs_anticipatable_p (x, insn)
1216 rtx x, insn;
1217 {
1218 return oprs_unchanged_p (x, insn, 0);
1219 }
1220
1221 /* Return non-zero if the operands of expression X are unchanged from
1222 INSN to the end of INSN's basic block. */
1223
1224 static int
1225 oprs_available_p (x, insn)
1226 rtx x, insn;
1227 {
1228 return oprs_unchanged_p (x, insn, 1);
1229 }
1230
1231 /* Hash expression X.
1232 MODE is only used if X is a CONST_INT.
1233 A boolean indicating if a volatile operand is found or if the expression
1234 contains something we don't want to insert in the table is stored in
1235 DO_NOT_RECORD_P.
1236
1237 ??? One might want to merge this with canon_hash. Later. */
1238
1239 static unsigned int
1240 hash_expr (x, mode, do_not_record_p, hash_table_size)
1241 rtx x;
1242 enum machine_mode mode;
1243 int *do_not_record_p;
1244 int hash_table_size;
1245 {
1246 unsigned int hash;
1247
1248 *do_not_record_p = 0;
1249
1250 hash = hash_expr_1 (x, mode, do_not_record_p);
1251 return hash % hash_table_size;
1252 }
1253
1254 /* Subroutine of hash_expr to do the actual work. */
1255
1256 static unsigned int
1257 hash_expr_1 (x, mode, do_not_record_p)
1258 rtx x;
1259 enum machine_mode mode;
1260 int *do_not_record_p;
1261 {
1262 int i, j;
1263 unsigned hash = 0;
1264 enum rtx_code code;
1265 char *fmt;
1266
1267 /* repeat is used to turn tail-recursion into iteration. */
1268 repeat:
1269
1270 if (x == 0)
1271 return hash;
1272
1273 code = GET_CODE (x);
1274 switch (code)
1275 {
1276 case REG:
1277 {
1278 register int regno = REGNO (x);
1279 hash += ((unsigned) REG << 7) + regno;
1280 return hash;
1281 }
1282
1283 case CONST_INT:
1284 {
1285 unsigned HOST_WIDE_INT tem = INTVAL (x);
1286 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1287 return hash;
1288 }
1289
1290 case CONST_DOUBLE:
1291 /* This is like the general case, except that it only counts
1292 the integers representing the constant. */
1293 hash += (unsigned) code + (unsigned) GET_MODE (x);
1294 if (GET_MODE (x) != VOIDmode)
1295 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1296 {
1297 unsigned tem = XINT (x, i);
1298 hash += tem;
1299 }
1300 else
1301 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1302 + (unsigned) CONST_DOUBLE_HIGH (x));
1303 return hash;
1304
1305 /* Assume there is only one rtx object for any given label. */
1306 case LABEL_REF:
1307 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1308 differences and differences between each stage's debugging dumps. */
1309 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1310 return hash;
1311
1312 case SYMBOL_REF:
1313 {
1314 /* Don't hash on the symbol's address to avoid bootstrap differences.
1315 Different hash values may cause expressions to be recorded in
1316 different orders and thus different registers to be used in the
1317 final assembler. This also avoids differences in the dump files
1318 between various stages. */
1319 unsigned int h = 0;
1320 unsigned char *p = (unsigned char *) XSTR (x, 0);
1321 while (*p)
1322 h += (h << 7) + *p++; /* ??? revisit */
1323 hash += ((unsigned) SYMBOL_REF << 7) + h;
1324 return hash;
1325 }
1326
1327 case MEM:
1328 if (MEM_VOLATILE_P (x))
1329 {
1330 *do_not_record_p = 1;
1331 return 0;
1332 }
1333 hash += (unsigned) MEM;
1334 x = XEXP (x, 0);
1335 goto repeat;
1336
1337 case PRE_DEC:
1338 case PRE_INC:
1339 case POST_DEC:
1340 case POST_INC:
1341 case PC:
1342 case CC0:
1343 case CALL:
1344 case UNSPEC_VOLATILE:
1345 *do_not_record_p = 1;
1346 return 0;
1347
1348 case ASM_OPERANDS:
1349 if (MEM_VOLATILE_P (x))
1350 {
1351 *do_not_record_p = 1;
1352 return 0;
1353 }
1354
1355 default:
1356 break;
1357 }
1358
1359 i = GET_RTX_LENGTH (code) - 1;
1360 hash += (unsigned) code + (unsigned) GET_MODE (x);
1361 fmt = GET_RTX_FORMAT (code);
1362 for (; i >= 0; i--)
1363 {
1364 if (fmt[i] == 'e')
1365 {
1366 rtx tem = XEXP (x, i);
1367
1368 /* If we are about to do the last recursive call
1369 needed at this level, change it into iteration.
1370 This function is called enough to be worth it. */
1371 if (i == 0)
1372 {
1373 x = tem;
1374 goto repeat;
1375 }
1376 hash += hash_expr_1 (tem, 0, do_not_record_p);
1377 if (*do_not_record_p)
1378 return 0;
1379 }
1380 else if (fmt[i] == 'E')
1381 for (j = 0; j < XVECLEN (x, i); j++)
1382 {
1383 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1384 if (*do_not_record_p)
1385 return 0;
1386 }
1387 else if (fmt[i] == 's')
1388 {
1389 register unsigned char *p = (unsigned char *) XSTR (x, i);
1390 if (p)
1391 while (*p)
1392 hash += *p++;
1393 }
1394 else if (fmt[i] == 'i')
1395 {
1396 register unsigned tem = XINT (x, i);
1397 hash += tem;
1398 }
1399 else
1400 abort ();
1401 }
1402
1403 return hash;
1404 }
1405
1406 /* Hash a set of register REGNO.
1407
1408 Sets are hashed on the register that is set.
1409 This simplifies the PRE copy propagation code.
1410
1411 ??? May need to make things more elaborate. Later, as necessary. */
1412
1413 static unsigned int
1414 hash_set (regno, hash_table_size)
1415 int regno;
1416 int hash_table_size;
1417 {
1418 unsigned int hash;
1419
1420 hash = regno;
1421 return hash % hash_table_size;
1422 }
1423
1424 /* Return non-zero if exp1 is equivalent to exp2.
1425 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1426
1427 static int
1428 expr_equiv_p (x, y)
1429 rtx x, y;
1430 {
1431 register int i, j;
1432 register enum rtx_code code;
1433 register char *fmt;
1434
1435 if (x == y)
1436 return 1;
1437 if (x == 0 || y == 0)
1438 return x == y;
1439
1440 code = GET_CODE (x);
1441 if (code != GET_CODE (y))
1442 return 0;
1443
1444 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1445 if (GET_MODE (x) != GET_MODE (y))
1446 return 0;
1447
1448 switch (code)
1449 {
1450 case PC:
1451 case CC0:
1452 return x == y;
1453
1454 case CONST_INT:
1455 return INTVAL (x) == INTVAL (y);
1456
1457 case LABEL_REF:
1458 return XEXP (x, 0) == XEXP (y, 0);
1459
1460 case SYMBOL_REF:
1461 return XSTR (x, 0) == XSTR (y, 0);
1462
1463 case REG:
1464 return REGNO (x) == REGNO (y);
1465
1466 /* For commutative operations, check both orders. */
1467 case PLUS:
1468 case MULT:
1469 case AND:
1470 case IOR:
1471 case XOR:
1472 case NE:
1473 case EQ:
1474 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1475 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1476 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1477 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1478
1479 default:
1480 break;
1481 }
1482
1483 /* Compare the elements. If any pair of corresponding elements
1484 fail to match, return 0 for the whole thing. */
1485
1486 fmt = GET_RTX_FORMAT (code);
1487 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1488 {
1489 switch (fmt[i])
1490 {
1491 case 'e':
1492 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1493 return 0;
1494 break;
1495
1496 case 'E':
1497 if (XVECLEN (x, i) != XVECLEN (y, i))
1498 return 0;
1499 for (j = 0; j < XVECLEN (x, i); j++)
1500 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1501 return 0;
1502 break;
1503
1504 case 's':
1505 if (strcmp (XSTR (x, i), XSTR (y, i)))
1506 return 0;
1507 break;
1508
1509 case 'i':
1510 if (XINT (x, i) != XINT (y, i))
1511 return 0;
1512 break;
1513
1514 case 'w':
1515 if (XWINT (x, i) != XWINT (y, i))
1516 return 0;
1517 break;
1518
1519 case '0':
1520 break;
1521
1522 default:
1523 abort ();
1524 }
1525 }
1526
1527 return 1;
1528 }
1529
1530 /* Insert expression X in INSN in the hash table.
1531 If it is already present, record it as the last occurrence in INSN's
1532 basic block.
1533
1534 MODE is the mode of the value X is being stored into.
1535 It is only used if X is a CONST_INT.
1536
1537 ANTIC_P is non-zero if X is an anticipatable expression.
1538 AVAIL_P is non-zero if X is an available expression. */
1539
1540 static void
1541 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1542 rtx x;
1543 enum machine_mode mode;
1544 rtx insn;
1545 int antic_p, avail_p;
1546 {
1547 int found, do_not_record_p;
1548 unsigned int hash;
1549 struct expr *cur_expr, *last_expr = NULL;
1550 struct occr *antic_occr, *avail_occr;
1551 struct occr *last_occr = NULL;
1552
1553 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1554
1555 /* Do not insert expression in table if it contains volatile operands,
1556 or if hash_expr determines the expression is something we don't want
1557 to or can't handle. */
1558 if (do_not_record_p)
1559 return;
1560
1561 cur_expr = expr_hash_table[hash];
1562 found = 0;
1563
1564 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1565 {
1566 /* If the expression isn't found, save a pointer to the end of
1567 the list. */
1568 last_expr = cur_expr;
1569 cur_expr = cur_expr->next_same_hash;
1570 }
1571
1572 if (! found)
1573 {
1574 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1575 bytes_used += sizeof (struct expr);
1576 if (expr_hash_table[hash] == NULL)
1577 {
1578 /* This is the first pattern that hashed to this index. */
1579 expr_hash_table[hash] = cur_expr;
1580 }
1581 else
1582 {
1583 /* Add EXPR to end of this hash chain. */
1584 last_expr->next_same_hash = cur_expr;
1585 }
1586 /* Set the fields of the expr element. */
1587 cur_expr->expr = x;
1588 cur_expr->bitmap_index = n_exprs++;
1589 cur_expr->next_same_hash = NULL;
1590 cur_expr->antic_occr = NULL;
1591 cur_expr->avail_occr = NULL;
1592 }
1593
1594 /* Now record the occurrence(s). */
1595
1596 if (antic_p)
1597 {
1598 antic_occr = cur_expr->antic_occr;
1599
1600 /* Search for another occurrence in the same basic block. */
1601 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1602 {
1603 /* If an occurrence isn't found, save a pointer to the end of
1604 the list. */
1605 last_occr = antic_occr;
1606 antic_occr = antic_occr->next;
1607 }
1608
1609 if (antic_occr)
1610 {
1611 /* Found another instance of the expression in the same basic block.
1612 Prefer the currently recorded one. We want the first one in the
1613 block and the block is scanned from start to end. */
1614 ; /* nothing to do */
1615 }
1616 else
1617 {
1618 /* First occurrence of this expression in this basic block. */
1619 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1620 bytes_used += sizeof (struct occr);
1621 /* First occurrence of this expression in any block? */
1622 if (cur_expr->antic_occr == NULL)
1623 cur_expr->antic_occr = antic_occr;
1624 else
1625 last_occr->next = antic_occr;
1626 antic_occr->insn = insn;
1627 antic_occr->next = NULL;
1628 }
1629 }
1630
1631 if (avail_p)
1632 {
1633 avail_occr = cur_expr->avail_occr;
1634
1635 /* Search for another occurrence in the same basic block. */
1636 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1637 {
1638 /* If an occurrence isn't found, save a pointer to the end of
1639 the list. */
1640 last_occr = avail_occr;
1641 avail_occr = avail_occr->next;
1642 }
1643
1644 if (avail_occr)
1645 {
1646 /* Found another instance of the expression in the same basic block.
1647 Prefer this occurrence to the currently recorded one. We want
1648 the last one in the block and the block is scanned from start
1649 to end. */
1650 avail_occr->insn = insn;
1651 }
1652 else
1653 {
1654 /* First occurrence of this expression in this basic block. */
1655 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1656 bytes_used += sizeof (struct occr);
1657 /* First occurrence of this expression in any block? */
1658 if (cur_expr->avail_occr == NULL)
1659 cur_expr->avail_occr = avail_occr;
1660 else
1661 last_occr->next = avail_occr;
1662 avail_occr->insn = insn;
1663 avail_occr->next = NULL;
1664 }
1665 }
1666 }
1667
1668 /* Insert pattern X in INSN in the hash table.
1669 X is a SET of a reg to either another reg or a constant.
1670 If it is already present, record it as the last occurrence in INSN's
1671 basic block. */
1672
1673 static void
1674 insert_set_in_table (x, insn)
1675 rtx x;
1676 rtx insn;
1677 {
1678 int found;
1679 unsigned int hash;
1680 struct expr *cur_expr, *last_expr = NULL;
1681 struct occr *cur_occr, *last_occr = NULL;
1682
1683 if (GET_CODE (x) != SET
1684 || GET_CODE (SET_DEST (x)) != REG)
1685 abort ();
1686
1687 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1688
1689 cur_expr = set_hash_table[hash];
1690 found = 0;
1691
1692 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1693 {
1694 /* If the expression isn't found, save a pointer to the end of
1695 the list. */
1696 last_expr = cur_expr;
1697 cur_expr = cur_expr->next_same_hash;
1698 }
1699
1700 if (! found)
1701 {
1702 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1703 bytes_used += sizeof (struct expr);
1704 if (set_hash_table[hash] == NULL)
1705 {
1706 /* This is the first pattern that hashed to this index. */
1707 set_hash_table[hash] = cur_expr;
1708 }
1709 else
1710 {
1711 /* Add EXPR to end of this hash chain. */
1712 last_expr->next_same_hash = cur_expr;
1713 }
1714 /* Set the fields of the expr element.
1715 We must copy X because it can be modified when copy propagation is
1716 performed on its operands. */
1717 /* ??? Should this go in a different obstack? */
1718 cur_expr->expr = copy_rtx (x);
1719 cur_expr->bitmap_index = n_sets++;
1720 cur_expr->next_same_hash = NULL;
1721 cur_expr->antic_occr = NULL;
1722 cur_expr->avail_occr = NULL;
1723 }
1724
1725 /* Now record the occurrence. */
1726
1727 cur_occr = cur_expr->avail_occr;
1728
1729 /* Search for another occurrence in the same basic block. */
1730 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1731 {
1732 /* If an occurrence isn't found, save a pointer to the end of
1733 the list. */
1734 last_occr = cur_occr;
1735 cur_occr = cur_occr->next;
1736 }
1737
1738 if (cur_occr)
1739 {
1740 /* Found another instance of the expression in the same basic block.
1741 Prefer this occurrence to the currently recorded one. We want
1742 the last one in the block and the block is scanned from start
1743 to end. */
1744 cur_occr->insn = insn;
1745 }
1746 else
1747 {
1748 /* First occurrence of this expression in this basic block. */
1749 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1750 bytes_used += sizeof (struct occr);
1751 /* First occurrence of this expression in any block? */
1752 if (cur_expr->avail_occr == NULL)
1753 cur_expr->avail_occr = cur_occr;
1754 else
1755 last_occr->next = cur_occr;
1756 cur_occr->insn = insn;
1757 cur_occr->next = NULL;
1758 }
1759 }
1760
1761 /* Scan pattern PAT of INSN and add an entry to the hash table.
1762 If SET_P is non-zero, this is for the assignment hash table,
1763 otherwise it is for the expression hash table. */
1764
1765 static void
1766 hash_scan_set (pat, insn, set_p)
1767 rtx pat, insn;
1768 int set_p;
1769 {
1770 rtx src = SET_SRC (pat);
1771 rtx dest = SET_DEST (pat);
1772
1773 if (GET_CODE (src) == CALL)
1774 hash_scan_call (src, insn);
1775
1776 if (GET_CODE (dest) == REG)
1777 {
1778 int regno = REGNO (dest);
1779 rtx tmp;
1780
1781 /* Only record sets of pseudo-regs in the hash table. */
1782 if (! set_p
1783 && regno >= FIRST_PSEUDO_REGISTER
1784 /* Don't GCSE something if we can't do a reg/reg copy. */
1785 && can_copy_p [GET_MODE (dest)]
1786 /* Is SET_SRC something we want to gcse? */
1787 && want_to_gcse_p (src))
1788 {
1789 /* An expression is not anticipatable if its operands are
1790 modified before this insn. */
1791 int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn);
1792 /* An expression is not available if its operands are
1793 subsequently modified, including this insn. */
1794 int avail_p = oprs_available_p (src, insn);
1795 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1796 }
1797 /* Record sets for constant/copy propagation. */
1798 else if (set_p
1799 && regno >= FIRST_PSEUDO_REGISTER
1800 && ((GET_CODE (src) == REG
1801 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1802 && can_copy_p [GET_MODE (dest)])
1803 /* ??? CONST_INT:wip */
1804 || GET_CODE (src) == CONST_INT)
1805 /* A copy is not available if its src or dest is subsequently
1806 modified. Here we want to search from INSN+1 on, but
1807 oprs_available_p searches from INSN on. */
1808 && (insn == BLOCK_END (BLOCK_NUM (insn))
1809 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1810 && oprs_available_p (pat, tmp))))
1811 insert_set_in_table (pat, insn);
1812 }
1813
1814 /* Check if first/last set in this block for classic gcse,
1815 but not for copy/constant propagation. */
1816 if (optimize_size && !set_p)
1817
1818 {
1819 rtx dest = SET_DEST (pat);
1820
1821 while (GET_CODE (dest) == SUBREG
1822 || GET_CODE (dest) == ZERO_EXTRACT
1823 || GET_CODE (dest) == SIGN_EXTRACT
1824 || GET_CODE (dest) == STRICT_LOW_PART)
1825 dest = XEXP (dest, 0);
1826 if (GET_CODE (dest) == REG)
1827 maybe_set_rd_gen (REGNO (dest), insn);
1828 }
1829 }
1830
1831 static void
1832 hash_scan_clobber (x, insn)
1833 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1834 {
1835 /* Currently nothing to do. */
1836 }
1837
1838 static void
1839 hash_scan_call (x, insn)
1840 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1841 {
1842 /* Currently nothing to do. */
1843 }
1844
1845 /* Process INSN and add hash table entries as appropriate.
1846
1847 Only available expressions that set a single pseudo-reg are recorded.
1848
1849 Single sets in a PARALLEL could be handled, but it's an extra complication
1850 that isn't dealt with right now. The trick is handling the CLOBBERs that
1851 are also in the PARALLEL. Later.
1852
1853 If SET_P is non-zero, this is for the assignment hash table,
1854 otherwise it is for the expression hash table.
1855 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1856 not record any expressions. */
1857
1858 static void
1859 hash_scan_insn (insn, set_p, in_libcall_block)
1860 rtx insn;
1861 int set_p;
1862 int in_libcall_block;
1863 {
1864 rtx pat = PATTERN (insn);
1865
1866 /* Pick out the sets of INSN and for other forms of instructions record
1867 what's been modified. */
1868
1869 if (GET_CODE (pat) == SET && ! in_libcall_block)
1870 hash_scan_set (pat, insn, set_p);
1871 else if (GET_CODE (pat) == PARALLEL)
1872 {
1873 int i;
1874
1875 for (i = 0; i < XVECLEN (pat, 0); i++)
1876 {
1877 rtx x = XVECEXP (pat, 0, i);
1878
1879 if (GET_CODE (x) == SET)
1880 {
1881 if (GET_CODE (SET_SRC (x)) == CALL)
1882 hash_scan_call (SET_SRC (x), insn);
1883
1884 /* Check if first/last set in this block for classic
1885 gcse, but not for constant/copy propagation. */
1886 if (optimize_size && !set_p)
1887 {
1888 rtx dest = SET_DEST (x);
1889
1890 while (GET_CODE (dest) == SUBREG
1891 || GET_CODE (dest) == ZERO_EXTRACT
1892 || GET_CODE (dest) == SIGN_EXTRACT
1893 || GET_CODE (dest) == STRICT_LOW_PART)
1894 dest = XEXP (dest, 0);
1895 if (GET_CODE (dest) == REG)
1896 maybe_set_rd_gen (REGNO (dest), insn);
1897 }
1898 }
1899 else if (GET_CODE (x) == CLOBBER)
1900 hash_scan_clobber (x, insn);
1901 else if (GET_CODE (x) == CALL)
1902 hash_scan_call (x, insn);
1903 }
1904 }
1905 else if (GET_CODE (pat) == CLOBBER)
1906 hash_scan_clobber (pat, insn);
1907 else if (GET_CODE (pat) == CALL)
1908 hash_scan_call (pat, insn);
1909 }
1910
1911 static void
1912 dump_hash_table (file, name, table, table_size, total_size)
1913 FILE *file;
1914 char *name;
1915 struct expr **table;
1916 int table_size, total_size;
1917 {
1918 int i;
1919 /* Flattened out table, so it's printed in proper order. */
1920 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
1921 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
1922
1923 bzero ((char *) flat_table, total_size * sizeof (struct expr *));
1924 for (i = 0; i < table_size; i++)
1925 {
1926 struct expr *expr;
1927
1928 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1929 {
1930 flat_table[expr->bitmap_index] = expr;
1931 hash_val[expr->bitmap_index] = i;
1932 }
1933 }
1934
1935 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1936 name, table_size, total_size);
1937
1938 for (i = 0; i < total_size; i++)
1939 {
1940 struct expr *expr = flat_table[i];
1941
1942 fprintf (file, "Index %d (hash value %d)\n ",
1943 expr->bitmap_index, hash_val[i]);
1944 print_rtl (file, expr->expr);
1945 fprintf (file, "\n");
1946 }
1947
1948 fprintf (file, "\n");
1949 }
1950
1951 /* Record register first/last/block set information for REGNO in INSN.
1952 reg_first_set records the first place in the block where the register
1953 is set and is used to compute "anticipatability".
1954 reg_last_set records the last place in the block where the register
1955 is set and is used to compute "availability".
1956 reg_set_in_block records whether the register is set in the block
1957 and is used to compute "transparency". */
1958
1959 static void
1960 record_last_reg_set_info (insn, regno)
1961 rtx insn;
1962 int regno;
1963 {
1964 if (reg_first_set[regno] == NEVER_SET)
1965 reg_first_set[regno] = INSN_CUID (insn);
1966 reg_last_set[regno] = INSN_CUID (insn);
1967 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
1968 }
1969
1970 /* Record memory first/last/block set information for INSN. */
1971
1972 static void
1973 record_last_mem_set_info (insn)
1974 rtx insn;
1975 {
1976 if (mem_first_set == NEVER_SET)
1977 mem_first_set = INSN_CUID (insn);
1978 mem_last_set = INSN_CUID (insn);
1979 mem_set_in_block[BLOCK_NUM (insn)] = 1;
1980 }
1981
1982 /* Used for communicating between next two routines. */
1983 static rtx last_set_insn;
1984
1985 /* Called from compute_hash_table via note_stores to handle one
1986 SET or CLOBBER in an insn. */
1987
1988 static void
1989 record_last_set_info (dest, setter)
1990 rtx dest, setter ATTRIBUTE_UNUSED;
1991 {
1992 if (GET_CODE (dest) == SUBREG)
1993 dest = SUBREG_REG (dest);
1994
1995 if (GET_CODE (dest) == REG)
1996 record_last_reg_set_info (last_set_insn, REGNO (dest));
1997 else if (GET_CODE (dest) == MEM
1998 /* Ignore pushes, they clobber nothing. */
1999 && ! push_operand (dest, GET_MODE (dest)))
2000 record_last_mem_set_info (last_set_insn);
2001 }
2002
2003 /* Top level function to create an expression or assignment hash table.
2004
2005 Expression entries are placed in the hash table if
2006 - they are of the form (set (pseudo-reg) src),
2007 - src is something we want to perform GCSE on,
2008 - none of the operands are subsequently modified in the block
2009
2010 Assignment entries are placed in the hash table if
2011 - they are of the form (set (pseudo-reg) src),
2012 - src is something we want to perform const/copy propagation on,
2013 - none of the operands or target are subsequently modified in the block
2014 Currently src must be a pseudo-reg or a const_int.
2015
2016 F is the first insn.
2017 SET_P is non-zero for computing the assignment hash table. */
2018
2019 static void
2020 compute_hash_table (f, set_p)
2021 rtx f ATTRIBUTE_UNUSED;
2022 int set_p;
2023 {
2024 int bb;
2025
2026 /* While we compute the hash table we also compute a bit array of which
2027 registers are set in which blocks.
2028 We also compute which blocks set memory, in the absence of aliasing
2029 support [which is TODO].
2030 ??? This isn't needed during const/copy propagation, but it's cheap to
2031 compute. Later. */
2032 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2033 bzero ((char *) mem_set_in_block, n_basic_blocks);
2034
2035 /* Some working arrays used to track first and last set in each block. */
2036 /* ??? One could use alloca here, but at some size a threshold is crossed
2037 beyond which one should use malloc. Are we at that threshold here? */
2038 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2039 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2040
2041 for (bb = 0; bb < n_basic_blocks; bb++)
2042 {
2043 rtx insn;
2044 int regno;
2045 int in_libcall_block;
2046 int i;
2047
2048 /* First pass over the instructions records information used to
2049 determine when registers and memory are first and last set.
2050 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2051 could be moved to compute_sets since they currently don't change. */
2052
2053 for (i = 0; i < max_gcse_regno; i++)
2054 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2055 mem_first_set = NEVER_SET;
2056 mem_last_set = NEVER_SET;
2057
2058 for (insn = basic_block_head[bb];
2059 insn && insn != NEXT_INSN (basic_block_end[bb]);
2060 insn = NEXT_INSN (insn))
2061 {
2062 #ifdef NON_SAVING_SETJMP
2063 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2064 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2065 {
2066 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2067 record_last_reg_set_info (insn, regno);
2068 continue;
2069 }
2070 #endif
2071
2072 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2073 continue;
2074
2075 if (GET_CODE (insn) == CALL_INSN)
2076 {
2077 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2078 if (call_used_regs[regno])
2079 record_last_reg_set_info (insn, regno);
2080 if (! CONST_CALL_P (insn))
2081 record_last_mem_set_info (insn);
2082 }
2083
2084 last_set_insn = insn;
2085 note_stores (PATTERN (insn), record_last_set_info);
2086 }
2087
2088 /* The next pass builds the hash table. */
2089
2090 for (insn = basic_block_head[bb], in_libcall_block = 0;
2091 insn && insn != NEXT_INSN (basic_block_end[bb]);
2092 insn = NEXT_INSN (insn))
2093 {
2094 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2095 {
2096 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2097 in_libcall_block = 1;
2098 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2099 in_libcall_block = 0;
2100 hash_scan_insn (insn, set_p, in_libcall_block);
2101 }
2102 }
2103 }
2104
2105 free (reg_first_set);
2106 free (reg_last_set);
2107 /* Catch bugs early. */
2108 reg_first_set = reg_last_set = 0;
2109 }
2110
2111 /* Allocate space for the set hash table.
2112 N_INSNS is the number of instructions in the function.
2113 It is used to determine the number of buckets to use. */
2114
2115 static void
2116 alloc_set_hash_table (n_insns)
2117 int n_insns;
2118 {
2119 int n;
2120
2121 set_hash_table_size = n_insns / 4;
2122 if (set_hash_table_size < 11)
2123 set_hash_table_size = 11;
2124 /* Attempt to maintain efficient use of hash table.
2125 Making it an odd number is simplest for now.
2126 ??? Later take some measurements. */
2127 set_hash_table_size |= 1;
2128 n = set_hash_table_size * sizeof (struct expr *);
2129 set_hash_table = (struct expr **) gmalloc (n);
2130 }
2131
2132 /* Free things allocated by alloc_set_hash_table. */
2133
2134 static void
2135 free_set_hash_table ()
2136 {
2137 free (set_hash_table);
2138 }
2139
2140 /* Compute the hash table for doing copy/const propagation. */
2141
2142 static void
2143 compute_set_hash_table (f)
2144 rtx f;
2145 {
2146 /* Initialize count of number of entries in hash table. */
2147 n_sets = 0;
2148 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2149
2150 compute_hash_table (f, 1);
2151 }
2152
2153 /* Allocate space for the expression hash table.
2154 N_INSNS is the number of instructions in the function.
2155 It is used to determine the number of buckets to use. */
2156
2157 static void
2158 alloc_expr_hash_table (n_insns)
2159 int n_insns;
2160 {
2161 int n;
2162
2163 expr_hash_table_size = n_insns / 2;
2164 /* Make sure the amount is usable. */
2165 if (expr_hash_table_size < 11)
2166 expr_hash_table_size = 11;
2167 /* Attempt to maintain efficient use of hash table.
2168 Making it an odd number is simplest for now.
2169 ??? Later take some measurements. */
2170 expr_hash_table_size |= 1;
2171 n = expr_hash_table_size * sizeof (struct expr *);
2172 expr_hash_table = (struct expr **) gmalloc (n);
2173 }
2174
2175 /* Free things allocated by alloc_expr_hash_table. */
2176
2177 static void
2178 free_expr_hash_table ()
2179 {
2180 free (expr_hash_table);
2181 }
2182
2183 /* Compute the hash table for doing GCSE. */
2184
2185 static void
2186 compute_expr_hash_table (f)
2187 rtx f;
2188 {
2189 /* Initialize count of number of entries in hash table. */
2190 n_exprs = 0;
2191 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2192
2193 compute_hash_table (f, 0);
2194 }
2195 \f
2196 /* Expression tracking support. */
2197
2198 /* Lookup pattern PAT in the expression table.
2199 The result is a pointer to the table entry, or NULL if not found. */
2200
2201 static struct expr *
2202 lookup_expr (pat)
2203 rtx pat;
2204 {
2205 int do_not_record_p;
2206 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2207 expr_hash_table_size);
2208 struct expr *expr;
2209
2210 if (do_not_record_p)
2211 return NULL;
2212
2213 expr = expr_hash_table[hash];
2214
2215 while (expr && ! expr_equiv_p (expr->expr, pat))
2216 expr = expr->next_same_hash;
2217
2218 return expr;
2219 }
2220
2221 /* Lookup REGNO in the set table.
2222 If PAT is non-NULL look for the entry that matches it, otherwise return
2223 the first entry for REGNO.
2224 The result is a pointer to the table entry, or NULL if not found. */
2225
2226 static struct expr *
2227 lookup_set (regno, pat)
2228 int regno;
2229 rtx pat;
2230 {
2231 unsigned int hash = hash_set (regno, set_hash_table_size);
2232 struct expr *expr;
2233
2234 expr = set_hash_table[hash];
2235
2236 if (pat)
2237 {
2238 while (expr && ! expr_equiv_p (expr->expr, pat))
2239 expr = expr->next_same_hash;
2240 }
2241 else
2242 {
2243 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2244 expr = expr->next_same_hash;
2245 }
2246
2247 return expr;
2248 }
2249
2250 /* Return the next entry for REGNO in list EXPR. */
2251
2252 static struct expr *
2253 next_set (regno, expr)
2254 int regno;
2255 struct expr *expr;
2256 {
2257 do
2258 expr = expr->next_same_hash;
2259 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2260 return expr;
2261 }
2262
2263 /* Reset tables used to keep track of what's still available [since the
2264 start of the block]. */
2265
2266 static void
2267 reset_opr_set_tables ()
2268 {
2269 /* Maintain a bitmap of which regs have been set since beginning of
2270 the block. */
2271 sbitmap_zero (reg_set_bitmap);
2272 /* Also keep a record of the last instruction to modify memory.
2273 For now this is very trivial, we only record whether any memory
2274 location has been modified. */
2275 mem_last_set = 0;
2276 }
2277
2278 /* Return non-zero if the operands of X are not set before INSN in
2279 INSN's basic block. */
2280
2281 static int
2282 oprs_not_set_p (x, insn)
2283 rtx x, insn;
2284 {
2285 int i;
2286 enum rtx_code code;
2287 char *fmt;
2288
2289 /* repeat is used to turn tail-recursion into iteration. */
2290 repeat:
2291
2292 if (x == 0)
2293 return 1;
2294
2295 code = GET_CODE (x);
2296 switch (code)
2297 {
2298 case PC:
2299 case CC0:
2300 case CONST:
2301 case CONST_INT:
2302 case CONST_DOUBLE:
2303 case SYMBOL_REF:
2304 case LABEL_REF:
2305 case ADDR_VEC:
2306 case ADDR_DIFF_VEC:
2307 return 1;
2308
2309 case MEM:
2310 if (mem_last_set != 0)
2311 return 0;
2312 x = XEXP (x, 0);
2313 goto repeat;
2314
2315 case REG:
2316 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2317
2318 default:
2319 break;
2320 }
2321
2322 fmt = GET_RTX_FORMAT (code);
2323 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2324 {
2325 if (fmt[i] == 'e')
2326 {
2327 int not_set_p;
2328 /* If we are about to do the last recursive call
2329 needed at this level, change it into iteration.
2330 This function is called enough to be worth it. */
2331 if (i == 0)
2332 {
2333 x = XEXP (x, 0);
2334 goto repeat;
2335 }
2336 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2337 if (! not_set_p)
2338 return 0;
2339 }
2340 else if (fmt[i] == 'E')
2341 {
2342 int j;
2343 for (j = 0; j < XVECLEN (x, i); j++)
2344 {
2345 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2346 if (! not_set_p)
2347 return 0;
2348 }
2349 }
2350 }
2351
2352 return 1;
2353 }
2354
2355 /* Mark things set by a CALL. */
2356
2357 static void
2358 mark_call (pat, insn)
2359 rtx pat ATTRIBUTE_UNUSED, insn;
2360 {
2361 mem_last_set = INSN_CUID (insn);
2362 }
2363
2364 /* Mark things set by a SET. */
2365
2366 static void
2367 mark_set (pat, insn)
2368 rtx pat, insn;
2369 {
2370 rtx dest = SET_DEST (pat);
2371
2372 while (GET_CODE (dest) == SUBREG
2373 || GET_CODE (dest) == ZERO_EXTRACT
2374 || GET_CODE (dest) == SIGN_EXTRACT
2375 || GET_CODE (dest) == STRICT_LOW_PART)
2376 dest = XEXP (dest, 0);
2377
2378 if (GET_CODE (dest) == REG)
2379 SET_BIT (reg_set_bitmap, REGNO (dest));
2380 else if (GET_CODE (dest) == MEM)
2381 mem_last_set = INSN_CUID (insn);
2382
2383 if (GET_CODE (SET_SRC (pat)) == CALL)
2384 mark_call (SET_SRC (pat), insn);
2385 }
2386
2387 /* Record things set by a CLOBBER. */
2388
2389 static void
2390 mark_clobber (pat, insn)
2391 rtx pat, insn;
2392 {
2393 rtx clob = XEXP (pat, 0);
2394
2395 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2396 clob = XEXP (clob, 0);
2397
2398 if (GET_CODE (clob) == REG)
2399 SET_BIT (reg_set_bitmap, REGNO (clob));
2400 else
2401 mem_last_set = INSN_CUID (insn);
2402 }
2403
2404 /* Record things set by INSN.
2405 This data is used by oprs_not_set_p. */
2406
2407 static void
2408 mark_oprs_set (insn)
2409 rtx insn;
2410 {
2411 rtx pat = PATTERN (insn);
2412
2413 if (GET_CODE (pat) == SET)
2414 mark_set (pat, insn);
2415 else if (GET_CODE (pat) == PARALLEL)
2416 {
2417 int i;
2418
2419 for (i = 0; i < XVECLEN (pat, 0); i++)
2420 {
2421 rtx x = XVECEXP (pat, 0, i);
2422
2423 if (GET_CODE (x) == SET)
2424 mark_set (x, insn);
2425 else if (GET_CODE (x) == CLOBBER)
2426 mark_clobber (x, insn);
2427 else if (GET_CODE (x) == CALL)
2428 mark_call (x, insn);
2429 }
2430 }
2431 else if (GET_CODE (pat) == CLOBBER)
2432 mark_clobber (pat, insn);
2433 else if (GET_CODE (pat) == CALL)
2434 mark_call (pat, insn);
2435 }
2436 \f
2437 /* Classic GCSE reaching definition support. */
2438
2439 /* Allocate reaching def variables. */
2440
2441 static void
2442 alloc_rd_mem (n_blocks, n_insns)
2443 int n_blocks, n_insns;
2444 {
2445 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2446 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2447
2448 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2449 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2450
2451 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2452 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2453
2454 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2455 sbitmap_vector_zero (rd_out, n_basic_blocks);
2456 }
2457
2458 /* Free reaching def variables. */
2459
2460 static void
2461 free_rd_mem ()
2462 {
2463 free (rd_kill);
2464 free (rd_gen);
2465 free (reaching_defs);
2466 free (rd_out);
2467 }
2468
2469 /* Add INSN to the kills of BB.
2470 REGNO, set in BB, is killed by INSN. */
2471
2472 static void
2473 handle_rd_kill_set (insn, regno, bb)
2474 rtx insn;
2475 int regno, bb;
2476 {
2477 struct reg_set *this_reg = reg_set_table[regno];
2478
2479 while (this_reg)
2480 {
2481 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2482 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2483 this_reg = this_reg->next;
2484 }
2485 }
2486
2487 void
2488 dump_rd_table (file, title, bmap)
2489 FILE *file;
2490 char *title;
2491 sbitmap *bmap;
2492 {
2493 int bb,cuid,i,j,n;
2494
2495 fprintf (file, "%s\n", title);
2496 for (bb = 0; bb < n_basic_blocks; bb++)
2497 {
2498 fprintf (file, "BB %d\n", bb);
2499 dump_sbitmap (file, bmap[bb]);
2500 for (i = n = cuid = 0; i < bmap[bb]->size; i++)
2501 {
2502 for (j = 0; j < SBITMAP_ELT_BITS; j++, cuid++)
2503 {
2504 if ((bmap[bb]->elms[i] & (1 << j)) != 0)
2505 {
2506 if (n % 10 == 0)
2507 fprintf (file, " ");
2508 fprintf (file, " %d", INSN_UID (CUID_INSN (cuid)));
2509 n++;
2510 }
2511 }
2512 }
2513 if (n != 0)
2514 fprintf (file, "\n");
2515 }
2516 fprintf (file, "\n");
2517 }
2518
2519 /* Compute the set of kill's for reaching definitions. */
2520
2521 static void
2522 compute_kill_rd ()
2523 {
2524 int bb,cuid;
2525
2526 /* For each block
2527 For each set bit in `gen' of the block (i.e each insn which
2528 generates a definition in the block)
2529 Call the reg set by the insn corresponding to that bit regx
2530 Look at the linked list starting at reg_set_table[regx]
2531 For each setting of regx in the linked list, which is not in
2532 this block
2533 Set the bit in `kill' corresponding to that insn
2534 */
2535
2536 for (bb = 0; bb < n_basic_blocks; bb++)
2537 {
2538 for (cuid = 0; cuid < max_cuid; cuid++)
2539 {
2540 if (TEST_BIT (rd_gen[bb], cuid))
2541 {
2542 rtx insn = CUID_INSN (cuid);
2543 rtx pat = PATTERN (insn);
2544
2545 if (GET_CODE (insn) == CALL_INSN)
2546 {
2547 int regno;
2548
2549 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2550 {
2551 if (call_used_regs[regno])
2552 handle_rd_kill_set (insn, regno, bb);
2553 }
2554 }
2555
2556 if (GET_CODE (pat) == PARALLEL)
2557 {
2558 int i;
2559
2560 /* We work backwards because ... */
2561 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2562 {
2563 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2564 if ((code == SET || code == CLOBBER)
2565 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2566 handle_rd_kill_set (insn,
2567 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2568 bb);
2569 }
2570 }
2571 else if (GET_CODE (pat) == SET)
2572 {
2573 if (GET_CODE (SET_DEST (pat)) == REG)
2574 {
2575 /* Each setting of this register outside of this block
2576 must be marked in the set of kills in this block. */
2577 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2578 }
2579 }
2580 /* FIXME: CLOBBER? */
2581 }
2582 }
2583 }
2584 }
2585
2586 /* Compute the reaching definitions as in
2587 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2588 Chapter 10. It is the same algorithm as used for computing available
2589 expressions but applied to the gens and kills of reaching definitions. */
2590
2591 static void
2592 compute_rd ()
2593 {
2594 int bb, changed, passes;
2595
2596 for (bb = 0; bb < n_basic_blocks; bb++)
2597 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2598
2599 passes = 0;
2600 changed = 1;
2601 while (changed)
2602 {
2603 changed = 0;
2604 for (bb = 0; bb < n_basic_blocks; bb++)
2605 {
2606 sbitmap_union_of_predecessors (reaching_defs[bb], rd_out,
2607 bb, s_preds);
2608 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2609 reaching_defs[bb], rd_kill[bb]);
2610 }
2611 passes++;
2612 }
2613
2614 if (gcse_file)
2615 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2616 }
2617 \f
2618 /* Classic GCSE available expression support. */
2619
2620 /* Allocate memory for available expression computation. */
2621
2622 static void
2623 alloc_avail_expr_mem (n_blocks, n_exprs)
2624 int n_blocks, n_exprs;
2625 {
2626 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2627 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2628
2629 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2630 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2631
2632 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2633 sbitmap_vector_zero (ae_in, n_basic_blocks);
2634
2635 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2636 sbitmap_vector_zero (ae_out, n_basic_blocks);
2637
2638 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2639 sbitmap_ones (u_bitmap);
2640 }
2641
2642 static void
2643 free_avail_expr_mem ()
2644 {
2645 free (ae_kill);
2646 free (ae_gen);
2647 free (ae_in);
2648 free (ae_out);
2649 free (u_bitmap);
2650 }
2651
2652 /* Compute the set of available expressions generated in each basic block. */
2653
2654 static void
2655 compute_ae_gen ()
2656 {
2657 int i;
2658
2659 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2660 This is all we have to do because an expression is not recorded if it
2661 is not available, and the only expressions we want to work with are the
2662 ones that are recorded. */
2663
2664 for (i = 0; i < expr_hash_table_size; i++)
2665 {
2666 struct expr *expr = expr_hash_table[i];
2667 while (expr != NULL)
2668 {
2669 struct occr *occr = expr->avail_occr;
2670 while (occr != NULL)
2671 {
2672 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2673 occr = occr->next;
2674 }
2675 expr = expr->next_same_hash;
2676 }
2677 }
2678 }
2679
2680 /* Return non-zero if expression X is killed in BB. */
2681
2682 static int
2683 expr_killed_p (x, bb)
2684 rtx x;
2685 int bb;
2686 {
2687 int i;
2688 enum rtx_code code;
2689 char *fmt;
2690
2691 /* repeat is used to turn tail-recursion into iteration. */
2692 repeat:
2693
2694 if (x == 0)
2695 return 1;
2696
2697 code = GET_CODE (x);
2698 switch (code)
2699 {
2700 case REG:
2701 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2702
2703 case MEM:
2704 if (mem_set_in_block[bb])
2705 return 1;
2706 x = XEXP (x, 0);
2707 goto repeat;
2708
2709 case PC:
2710 case CC0: /*FIXME*/
2711 case CONST:
2712 case CONST_INT:
2713 case CONST_DOUBLE:
2714 case SYMBOL_REF:
2715 case LABEL_REF:
2716 case ADDR_VEC:
2717 case ADDR_DIFF_VEC:
2718 return 0;
2719
2720 default:
2721 break;
2722 }
2723
2724 i = GET_RTX_LENGTH (code) - 1;
2725 fmt = GET_RTX_FORMAT (code);
2726 for (; i >= 0; i--)
2727 {
2728 if (fmt[i] == 'e')
2729 {
2730 rtx tem = XEXP (x, i);
2731
2732 /* If we are about to do the last recursive call
2733 needed at this level, change it into iteration.
2734 This function is called enough to be worth it. */
2735 if (i == 0)
2736 {
2737 x = tem;
2738 goto repeat;
2739 }
2740 if (expr_killed_p (tem, bb))
2741 return 1;
2742 }
2743 else if (fmt[i] == 'E')
2744 {
2745 int j;
2746 for (j = 0; j < XVECLEN (x, i); j++)
2747 {
2748 if (expr_killed_p (XVECEXP (x, i, j), bb))
2749 return 1;
2750 }
2751 }
2752 }
2753
2754 return 0;
2755 }
2756
2757 /* Compute the set of available expressions killed in each basic block. */
2758
2759 static void
2760 compute_ae_kill ()
2761 {
2762 int bb,i;
2763
2764 for (bb = 0; bb < n_basic_blocks; bb++)
2765 {
2766 for (i = 0; i < expr_hash_table_size; i++)
2767 {
2768 struct expr *expr = expr_hash_table[i];
2769
2770 for ( ; expr != NULL; expr = expr->next_same_hash)
2771 {
2772 /* Skip EXPR if generated in this block. */
2773 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2774 continue;
2775
2776 if (expr_killed_p (expr->expr, bb))
2777 SET_BIT (ae_kill[bb], expr->bitmap_index);
2778 }
2779 }
2780 }
2781 }
2782
2783 /* Compute available expressions.
2784
2785 Implement the algorithm to find available expressions
2786 as given in the Aho Sethi Ullman book, pages 627-631. */
2787
2788 static void
2789 compute_available ()
2790 {
2791 int bb, changed, passes;
2792
2793 sbitmap_zero (ae_in[0]);
2794
2795 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2796
2797 for (bb = 1; bb < n_basic_blocks; bb++)
2798 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2799
2800 passes = 0;
2801 changed = 1;
2802 while (changed)
2803 {
2804 changed = 0;
2805 for (bb = 1; bb < n_basic_blocks; bb++)
2806 {
2807 sbitmap_intersect_of_predecessors (ae_in[bb], ae_out,
2808 bb, s_preds);
2809 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2810 ae_in[bb], ae_kill[bb]);
2811 }
2812 passes++;
2813 }
2814
2815 if (gcse_file)
2816 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2817 }
2818 \f
2819 /* Actually perform the Classic GCSE optimizations. */
2820
2821 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2822
2823 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2824 as a positive reach. We want to do this when there are two computations
2825 of the expression in the block.
2826
2827 VISITED is a pointer to a working buffer for tracking which BB's have
2828 been visited. It is NULL for the top-level call.
2829
2830 We treat reaching expressions that go through blocks containing the same
2831 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2832 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2833 2 as not reaching. The intent is to improve the probability of finding
2834 only one reaching expression and to reduce register lifetimes by picking
2835 the closest such expression. */
2836
2837 static int
2838 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2839 struct occr *occr;
2840 struct expr *expr;
2841 int bb;
2842 int check_self_loop;
2843 char *visited;
2844 {
2845 int_list_ptr pred;
2846
2847 if (visited == NULL)
2848 {
2849 visited = (char *) alloca (n_basic_blocks);
2850 bzero (visited, n_basic_blocks);
2851 }
2852
2853 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
2854 {
2855 int pred_bb = INT_LIST_VAL (pred);
2856
2857 if (visited[pred_bb])
2858 {
2859 /* This predecessor has already been visited.
2860 Nothing to do. */
2861 ;
2862 }
2863 else if (pred_bb == bb)
2864 {
2865 /* BB loops on itself. */
2866 if (check_self_loop
2867 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2868 && BLOCK_NUM (occr->insn) == pred_bb)
2869 return 1;
2870 visited[pred_bb] = 1;
2871 }
2872 /* Ignore this predecessor if it kills the expression. */
2873 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2874 visited[pred_bb] = 1;
2875 /* Does this predecessor generate this expression? */
2876 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2877 {
2878 /* Is this the occurrence we're looking for?
2879 Note that there's only one generating occurrence per block
2880 so we just need to check the block number. */
2881 if (BLOCK_NUM (occr->insn) == pred_bb)
2882 return 1;
2883 visited[pred_bb] = 1;
2884 }
2885 /* Neither gen nor kill. */
2886 else
2887 {
2888 visited[pred_bb] = 1;
2889 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2890 return 1;
2891 }
2892 }
2893
2894 /* All paths have been checked. */
2895 return 0;
2896 }
2897
2898 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2899 If there is more than one such instruction, return NULL.
2900
2901 Called only by handle_avail_expr. */
2902
2903 static rtx
2904 computing_insn (expr, insn)
2905 struct expr *expr;
2906 rtx insn;
2907 {
2908 int bb = BLOCK_NUM (insn);
2909
2910 if (expr->avail_occr->next == NULL)
2911 {
2912 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2913 {
2914 /* The available expression is actually itself
2915 (i.e. a loop in the flow graph) so do nothing. */
2916 return NULL;
2917 }
2918 /* (FIXME) Case that we found a pattern that was created by
2919 a substitution that took place. */
2920 return expr->avail_occr->insn;
2921 }
2922 else
2923 {
2924 /* Pattern is computed more than once.
2925 Search backwards from this insn to see how many of these
2926 computations actually reach this insn. */
2927 struct occr *occr;
2928 rtx insn_computes_expr = NULL;
2929 int can_reach = 0;
2930
2931 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2932 {
2933 if (BLOCK_NUM (occr->insn) == bb)
2934 {
2935 /* The expression is generated in this block.
2936 The only time we care about this is when the expression
2937 is generated later in the block [and thus there's a loop].
2938 We let the normal cse pass handle the other cases. */
2939 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
2940 {
2941 if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
2942 {
2943 can_reach++;
2944 if (can_reach > 1)
2945 return NULL;
2946 insn_computes_expr = occr->insn;
2947 }
2948 }
2949 }
2950 else /* Computation of the pattern outside this block. */
2951 {
2952 if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
2953 {
2954 can_reach++;
2955 if (can_reach > 1)
2956 return NULL;
2957 insn_computes_expr = occr->insn;
2958 }
2959 }
2960 }
2961
2962 if (insn_computes_expr == NULL)
2963 abort ();
2964 return insn_computes_expr;
2965 }
2966 }
2967
2968 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2969 Only called by can_disregard_other_sets. */
2970
2971 static int
2972 def_reaches_here_p (insn, def_insn)
2973 rtx insn, def_insn;
2974 {
2975 rtx reg;
2976
2977 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
2978 return 1;
2979
2980 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
2981 {
2982 if (INSN_CUID (def_insn) < INSN_CUID (insn))
2983 {
2984 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
2985 return 1;
2986 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
2987 reg = XEXP (PATTERN (def_insn), 0);
2988 else if (GET_CODE (PATTERN (def_insn)) == SET)
2989 reg = SET_DEST (PATTERN (def_insn));
2990 else
2991 abort ();
2992 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
2993 }
2994 else
2995 return 0;
2996 }
2997
2998 return 0;
2999 }
3000
3001 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3002 The value returned is the number of definitions that reach INSN.
3003 Returning a value of zero means that [maybe] more than one definition
3004 reaches INSN and the caller can't perform whatever optimization it is
3005 trying. i.e. it is always safe to return zero. */
3006
3007 static int
3008 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3009 struct reg_set **addr_this_reg;
3010 rtx insn;
3011 int for_combine;
3012 {
3013 int number_of_reaching_defs = 0;
3014 struct reg_set *this_reg = *addr_this_reg;
3015
3016 while (this_reg)
3017 {
3018 if (def_reaches_here_p (insn, this_reg->insn))
3019 {
3020 number_of_reaching_defs++;
3021 /* Ignore parallels for now. */
3022 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3023 return 0;
3024 if (!for_combine
3025 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3026 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3027 SET_SRC (PATTERN (insn)))))
3028 {
3029 /* A setting of the reg to a different value reaches INSN. */
3030 return 0;
3031 }
3032 if (number_of_reaching_defs > 1)
3033 {
3034 /* If in this setting the value the register is being
3035 set to is equal to the previous value the register
3036 was set to and this setting reaches the insn we are
3037 trying to do the substitution on then we are ok. */
3038
3039 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3040 return 0;
3041 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3042 SET_SRC (PATTERN (insn))))
3043 return 0;
3044 }
3045 *addr_this_reg = this_reg;
3046 }
3047
3048 /* prev_this_reg = this_reg; */
3049 this_reg = this_reg->next;
3050 }
3051
3052 return number_of_reaching_defs;
3053 }
3054
3055 /* Expression computed by insn is available and the substitution is legal,
3056 so try to perform the substitution.
3057
3058 The result is non-zero if any changes were made. */
3059
3060 static int
3061 handle_avail_expr (insn, expr)
3062 rtx insn;
3063 struct expr *expr;
3064 {
3065 rtx pat, insn_computes_expr;
3066 rtx to;
3067 struct reg_set *this_reg;
3068 int found_setting, use_src;
3069 int changed = 0;
3070
3071 /* We only handle the case where one computation of the expression
3072 reaches this instruction. */
3073 insn_computes_expr = computing_insn (expr, insn);
3074 if (insn_computes_expr == NULL)
3075 return 0;
3076
3077 found_setting = 0;
3078 use_src = 0;
3079
3080 /* At this point we know only one computation of EXPR outside of this
3081 block reaches this insn. Now try to find a register that the
3082 expression is computed into. */
3083
3084 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3085 {
3086 /* This is the case when the available expression that reaches
3087 here has already been handled as an available expression. */
3088 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3089 /* If the register was created by GCSE we can't use `reg_set_table',
3090 however we know it's set only once. */
3091 if (regnum_for_replacing >= max_gcse_regno
3092 /* If the register the expression is computed into is set only once,
3093 or only one set reaches this insn, we can use it. */
3094 || (((this_reg = reg_set_table[regnum_for_replacing]),
3095 this_reg->next == NULL)
3096 || can_disregard_other_sets (&this_reg, insn, 0)))
3097 {
3098 use_src = 1;
3099 found_setting = 1;
3100 }
3101 }
3102
3103 if (!found_setting)
3104 {
3105 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3106 /* This shouldn't happen. */
3107 if (regnum_for_replacing >= max_gcse_regno)
3108 abort ();
3109 this_reg = reg_set_table[regnum_for_replacing];
3110 /* If the register the expression is computed into is set only once,
3111 or only one set reaches this insn, use it. */
3112 if (this_reg->next == NULL
3113 || can_disregard_other_sets (&this_reg, insn, 0))
3114 found_setting = 1;
3115 }
3116
3117 if (found_setting)
3118 {
3119 pat = PATTERN (insn);
3120 if (use_src)
3121 to = SET_SRC (PATTERN (insn_computes_expr));
3122 else
3123 to = SET_DEST (PATTERN (insn_computes_expr));
3124 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3125
3126 /* We should be able to ignore the return code from validate_change but
3127 to play it safe we check. */
3128 if (changed)
3129 {
3130 gcse_subst_count++;
3131 if (gcse_file != NULL)
3132 {
3133 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3134 INSN_UID (insn), REGNO (to),
3135 use_src ? "from" : "set in",
3136 INSN_UID (insn_computes_expr));
3137 }
3138
3139 }
3140 }
3141 /* The register that the expr is computed into is set more than once. */
3142 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3143 {
3144 /* Insert an insn after insnx that copies the reg set in insnx
3145 into a new pseudo register call this new register REGN.
3146 From insnb until end of basic block or until REGB is set
3147 replace all uses of REGB with REGN. */
3148 rtx new_insn;
3149
3150 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3151
3152 /* Generate the new insn. */
3153 /* ??? If the change fails, we return 0, even though we created
3154 an insn. I think this is ok. */
3155 new_insn
3156 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3157 SET_DEST (PATTERN (insn_computes_expr))),
3158 insn_computes_expr);
3159 /* Keep block number table up to date. */
3160 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3161 /* Keep register set table up to date. */
3162 record_one_set (REGNO (to), new_insn);
3163
3164 gcse_create_count++;
3165 if (gcse_file != NULL)
3166 {
3167 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3168 INSN_UID (NEXT_INSN (insn_computes_expr)),
3169 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3170 INSN_UID (insn_computes_expr));
3171 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
3172 }
3173
3174 pat = PATTERN (insn);
3175
3176 /* Do register replacement for INSN. */
3177 changed = validate_change (insn, &SET_SRC (pat),
3178 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3179 0);
3180
3181 /* We should be able to ignore the return code from validate_change but
3182 to play it safe we check. */
3183 if (changed)
3184 {
3185 gcse_subst_count++;
3186 if (gcse_file != NULL)
3187 {
3188 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3189 INSN_UID (insn),
3190 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3191 INSN_UID (insn_computes_expr));
3192 }
3193
3194 }
3195 }
3196
3197 return changed;
3198 }
3199
3200 /* Perform classic GCSE.
3201 This is called by one_classic_gcse_pass after all the dataflow analysis
3202 has been done.
3203
3204 The result is non-zero if a change was made. */
3205
3206 static int
3207 classic_gcse ()
3208 {
3209 int bb, changed;
3210 rtx insn;
3211
3212 /* Note we start at block 1. */
3213
3214 changed = 0;
3215 for (bb = 1; bb < n_basic_blocks; bb++)
3216 {
3217 /* Reset tables used to keep track of what's still valid [since the
3218 start of the block]. */
3219 reset_opr_set_tables ();
3220
3221 for (insn = basic_block_head[bb];
3222 insn != NULL && insn != NEXT_INSN (basic_block_end[bb]);
3223 insn = NEXT_INSN (insn))
3224 {
3225 /* Is insn of form (set (pseudo-reg) ...)? */
3226
3227 if (GET_CODE (insn) == INSN
3228 && GET_CODE (PATTERN (insn)) == SET
3229 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3230 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3231 {
3232 rtx pat = PATTERN (insn);
3233 rtx src = SET_SRC (pat);
3234 struct expr *expr;
3235
3236 if (want_to_gcse_p (src)
3237 /* Is the expression recorded? */
3238 && ((expr = lookup_expr (src)) != NULL)
3239 /* Is the expression available [at the start of the
3240 block]? */
3241 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3242 /* Are the operands unchanged since the start of the
3243 block? */
3244 && oprs_not_set_p (src, insn))
3245 changed |= handle_avail_expr (insn, expr);
3246 }
3247
3248 /* Keep track of everything modified by this insn. */
3249 /* ??? Need to be careful w.r.t. mods done to INSN. */
3250 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3251 mark_oprs_set (insn);
3252 }
3253 }
3254
3255 return changed;
3256 }
3257
3258 /* Top level routine to perform one classic GCSE pass.
3259
3260 Return non-zero if a change was made. */
3261
3262 static int
3263 one_classic_gcse_pass (f, pass)
3264 rtx f;
3265 int pass;
3266 {
3267 int changed = 0;
3268
3269 gcse_subst_count = 0;
3270 gcse_create_count = 0;
3271
3272 alloc_expr_hash_table (max_cuid);
3273 alloc_rd_mem (n_basic_blocks, max_cuid);
3274 compute_expr_hash_table (f);
3275 if (gcse_file)
3276 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3277 expr_hash_table_size, n_exprs);
3278 if (n_exprs > 0)
3279 {
3280 compute_kill_rd ();
3281 compute_rd ();
3282 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3283 compute_ae_gen ();
3284 compute_ae_kill ();
3285 compute_available ();
3286 changed = classic_gcse ();
3287 free_avail_expr_mem ();
3288 }
3289 free_rd_mem ();
3290 free_expr_hash_table ();
3291
3292 if (gcse_file)
3293 {
3294 fprintf (gcse_file, "\n");
3295 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3296 current_function_name, pass,
3297 bytes_used, gcse_subst_count, gcse_create_count);
3298 }
3299
3300 return changed;
3301 }
3302 \f
3303 /* Compute copy/constant propagation working variables. */
3304
3305 /* Local properties of assignments. */
3306
3307 static sbitmap *cprop_pavloc;
3308 static sbitmap *cprop_absaltered;
3309
3310 /* Global properties of assignments (computed from the local properties). */
3311
3312 static sbitmap *cprop_avin;
3313 static sbitmap *cprop_avout;
3314
3315 /* Allocate vars used for copy/const propagation.
3316 N_BLOCKS is the number of basic blocks.
3317 N_SETS is the number of sets. */
3318
3319 static void
3320 alloc_cprop_mem (n_blocks, n_sets)
3321 int n_blocks, n_sets;
3322 {
3323 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3324 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3325
3326 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3327 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3328 }
3329
3330 /* Free vars used by copy/const propagation. */
3331
3332 static void
3333 free_cprop_mem ()
3334 {
3335 free (cprop_pavloc);
3336 free (cprop_absaltered);
3337 free (cprop_avin);
3338 free (cprop_avout);
3339 }
3340
3341 /* Dump copy/const propagation data. */
3342
3343 void
3344 dump_cprop_data (file)
3345 FILE *file;
3346 {
3347 dump_sbitmap_vector (file, "CPROP partially locally available sets", "BB",
3348 cprop_pavloc, n_basic_blocks);
3349 dump_sbitmap_vector (file, "CPROP absolutely altered sets", "BB",
3350 cprop_absaltered, n_basic_blocks);
3351
3352 dump_sbitmap_vector (file, "CPROP available incoming sets", "BB",
3353 cprop_avin, n_basic_blocks);
3354 dump_sbitmap_vector (file, "CPROP available outgoing sets", "BB",
3355 cprop_avout, n_basic_blocks);
3356 }
3357
3358 /* For each block, compute whether X is transparent.
3359 X is either an expression or an assignment [though we don't care which,
3360 for this context an assignment is treated as an expression].
3361 For each block where an element of X is modified, set (SET_P == 1) or reset
3362 (SET_P == 0) the INDX bit in BMAP. */
3363
3364 static void
3365 compute_transp (x, indx, bmap, set_p)
3366 rtx x;
3367 int indx;
3368 sbitmap *bmap;
3369 int set_p;
3370 {
3371 int bb,i;
3372 enum rtx_code code;
3373 char *fmt;
3374
3375 /* repeat is used to turn tail-recursion into iteration. */
3376 repeat:
3377
3378 if (x == 0)
3379 return;
3380
3381 code = GET_CODE (x);
3382 switch (code)
3383 {
3384 case REG:
3385 {
3386 reg_set *r;
3387 int regno = REGNO (x);
3388
3389 if (set_p)
3390 {
3391 if (regno < FIRST_PSEUDO_REGISTER)
3392 {
3393 for (bb = 0; bb < n_basic_blocks; bb++)
3394 if (TEST_BIT (reg_set_in_block[bb], regno))
3395 SET_BIT (bmap[bb], indx);
3396 }
3397 else
3398 {
3399 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3400 {
3401 bb = BLOCK_NUM (r->insn);
3402 SET_BIT (bmap[bb], indx);
3403 }
3404 }
3405 }
3406 else
3407 {
3408 if (regno < FIRST_PSEUDO_REGISTER)
3409 {
3410 for (bb = 0; bb < n_basic_blocks; bb++)
3411 if (TEST_BIT (reg_set_in_block[bb], regno))
3412 RESET_BIT (bmap[bb], indx);
3413 }
3414 else
3415 {
3416 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3417 {
3418 bb = BLOCK_NUM (r->insn);
3419 RESET_BIT (bmap[bb], indx);
3420 }
3421 }
3422 }
3423 return;
3424 }
3425
3426 case MEM:
3427 if (set_p)
3428 {
3429 for (bb = 0; bb < n_basic_blocks; bb++)
3430 if (mem_set_in_block[bb])
3431 SET_BIT (bmap[bb], indx);
3432 }
3433 else
3434 {
3435 for (bb = 0; bb < n_basic_blocks; bb++)
3436 if (mem_set_in_block[bb])
3437 RESET_BIT (bmap[bb], indx);
3438 }
3439 x = XEXP (x, 0);
3440 goto repeat;
3441
3442 case PC:
3443 case CC0: /*FIXME*/
3444 case CONST:
3445 case CONST_INT:
3446 case CONST_DOUBLE:
3447 case SYMBOL_REF:
3448 case LABEL_REF:
3449 case ADDR_VEC:
3450 case ADDR_DIFF_VEC:
3451 return;
3452
3453 default:
3454 break;
3455 }
3456
3457 i = GET_RTX_LENGTH (code) - 1;
3458 fmt = GET_RTX_FORMAT (code);
3459 for (; i >= 0; i--)
3460 {
3461 if (fmt[i] == 'e')
3462 {
3463 rtx tem = XEXP (x, i);
3464
3465 /* If we are about to do the last recursive call
3466 needed at this level, change it into iteration.
3467 This function is called enough to be worth it. */
3468 if (i == 0)
3469 {
3470 x = tem;
3471 goto repeat;
3472 }
3473 compute_transp (tem, indx, bmap, set_p);
3474 }
3475 else if (fmt[i] == 'E')
3476 {
3477 int j;
3478 for (j = 0; j < XVECLEN (x, i); j++)
3479 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3480 }
3481 }
3482 }
3483
3484 static void
3485 compute_cprop_local_properties ()
3486 {
3487 int i;
3488
3489 sbitmap_vector_zero (cprop_absaltered, n_basic_blocks);
3490 sbitmap_vector_zero (cprop_pavloc, n_basic_blocks);
3491
3492 for (i = 0; i < set_hash_table_size; i++)
3493 {
3494 struct expr *expr;
3495
3496 for (expr = set_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
3497 {
3498 struct occr *occr;
3499 int indx = expr->bitmap_index;
3500
3501 /* The assignment is absolutely altered if any operand is modified
3502 by this block [excluding the assignment itself].
3503 We start by assuming all are transparent [none are killed], and
3504 then setting the bits for those that are. */
3505
3506 compute_transp (expr->expr, indx, cprop_absaltered, 1);
3507
3508 /* The occurrences recorded in avail_occr are exactly those that
3509 we want to set to non-zero in PAVLOC. */
3510
3511 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3512 {
3513 int bb = BLOCK_NUM (occr->insn);
3514 SET_BIT (cprop_pavloc[bb], indx);
3515 }
3516 }
3517 }
3518 }
3519
3520 static void
3521 compute_cprop_avinout ()
3522 {
3523 int bb, changed, passes;
3524
3525 sbitmap_zero (cprop_avin[0]);
3526 sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3527
3528 passes = 0;
3529 changed = 1;
3530 while (changed)
3531 {
3532 changed = 0;
3533 for (bb = 0; bb < n_basic_blocks; bb++)
3534 {
3535 if (bb != 0)
3536 sbitmap_intersect_of_predecessors (cprop_avin[bb], cprop_avout,
3537 bb, s_preds);
3538 changed |= sbitmap_union_of_diff (cprop_avout[bb],
3539 cprop_pavloc[bb],
3540 cprop_avin[bb],
3541 cprop_absaltered[bb]);
3542 }
3543 passes++;
3544 }
3545
3546 if (gcse_file)
3547 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3548 }
3549
3550 /* Top level routine to do the dataflow analysis needed by copy/const
3551 propagation. */
3552
3553 static void
3554 compute_cprop_data ()
3555 {
3556 compute_cprop_local_properties ();
3557 compute_cprop_avinout ();
3558 }
3559 \f
3560 /* Copy/constant propagation. */
3561
3562 struct reg_use {
3563 rtx reg_rtx;
3564 };
3565
3566 /* Maximum number of register uses in an insn that we handle. */
3567 #define MAX_USES 8
3568
3569 /* Table of uses found in an insn.
3570 Allocated statically to avoid alloc/free complexity and overhead. */
3571 static struct reg_use reg_use_table[MAX_USES];
3572
3573 /* Index into `reg_use_table' while building it. */
3574 static int reg_use_count;
3575
3576 /* Set up a list of register numbers used in INSN.
3577 The found uses are stored in `reg_use_table'.
3578 `reg_use_count' is initialized to zero before entry, and
3579 contains the number of uses in the table upon exit.
3580
3581 ??? If a register appears multiple times we will record it multiple
3582 times. This doesn't hurt anything but it will slow things down. */
3583
3584 static void
3585 find_used_regs (x)
3586 rtx x;
3587 {
3588 int i;
3589 enum rtx_code code;
3590 char *fmt;
3591
3592 /* repeat is used to turn tail-recursion into iteration. */
3593 repeat:
3594
3595 if (x == 0)
3596 return;
3597
3598 code = GET_CODE (x);
3599 switch (code)
3600 {
3601 case REG:
3602 if (reg_use_count == MAX_USES)
3603 return;
3604 reg_use_table[reg_use_count].reg_rtx = x;
3605 reg_use_count++;
3606 return;
3607
3608 case MEM:
3609 x = XEXP (x, 0);
3610 goto repeat;
3611
3612 case PC:
3613 case CC0:
3614 case CONST:
3615 case CONST_INT:
3616 case CONST_DOUBLE:
3617 case SYMBOL_REF:
3618 case LABEL_REF:
3619 case CLOBBER:
3620 case ADDR_VEC:
3621 case ADDR_DIFF_VEC:
3622 case ASM_INPUT: /*FIXME*/
3623 return;
3624
3625 case SET:
3626 if (GET_CODE (SET_DEST (x)) == MEM)
3627 find_used_regs (SET_DEST (x));
3628 x = SET_SRC (x);
3629 goto repeat;
3630
3631 default:
3632 break;
3633 }
3634
3635 /* Recursively scan the operands of this expression. */
3636
3637 fmt = GET_RTX_FORMAT (code);
3638 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3639 {
3640 if (fmt[i] == 'e')
3641 {
3642 /* If we are about to do the last recursive call
3643 needed at this level, change it into iteration.
3644 This function is called enough to be worth it. */
3645 if (i == 0)
3646 {
3647 x = XEXP (x, 0);
3648 goto repeat;
3649 }
3650 find_used_regs (XEXP (x, i));
3651 }
3652 else if (fmt[i] == 'E')
3653 {
3654 int j;
3655 for (j = 0; j < XVECLEN (x, i); j++)
3656 find_used_regs (XVECEXP (x, i, j));
3657 }
3658 }
3659 }
3660
3661 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3662 Returns non-zero is successful. */
3663
3664 static int
3665 try_replace_reg (from, to, insn)
3666 rtx from, to, insn;
3667 {
3668 return validate_replace_src (from, to, insn);
3669 }
3670
3671 /* Find a set of REGNO that is available on entry to INSN's block.
3672 Returns NULL if not found. */
3673
3674 static struct expr *
3675 find_avail_set (regno, insn)
3676 int regno;
3677 rtx insn;
3678 {
3679 struct expr *set = lookup_set (regno, NULL_RTX);
3680
3681 while (set)
3682 {
3683 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3684 break;
3685 set = next_set (regno, set);
3686 }
3687
3688 return set;
3689 }
3690
3691 /* Perform constant and copy propagation on INSN.
3692 The result is non-zero if a change was made. */
3693
3694 static int
3695 cprop_insn (insn)
3696 rtx insn;
3697 {
3698 struct reg_use *reg_used;
3699 int changed = 0;
3700
3701 /* ??? For now only propagate into SETs. */
3702 if (GET_CODE (insn) != INSN
3703 || GET_CODE (PATTERN (insn)) != SET)
3704 return 0;
3705
3706 reg_use_count = 0;
3707 find_used_regs (PATTERN (insn));
3708
3709 reg_used = &reg_use_table[0];
3710 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3711 {
3712 rtx pat, src;
3713 struct expr *set;
3714 int regno = REGNO (reg_used->reg_rtx);
3715
3716 /* Ignore registers created by GCSE.
3717 We do this because ... */
3718 if (regno >= max_gcse_regno)
3719 continue;
3720
3721 /* If the register has already been set in this block, there's
3722 nothing we can do. */
3723 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3724 continue;
3725
3726 /* Find an assignment that sets reg_used and is available
3727 at the start of the block. */
3728 set = find_avail_set (regno, insn);
3729 if (! set)
3730 continue;
3731
3732 pat = set->expr;
3733 /* ??? We might be able to handle PARALLELs. Later. */
3734 if (GET_CODE (pat) != SET)
3735 abort ();
3736 src = SET_SRC (pat);
3737
3738 if (GET_CODE (src) == CONST_INT)
3739 {
3740 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3741 {
3742 changed = 1;
3743 const_prop_count++;
3744 if (gcse_file != NULL)
3745 {
3746 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3747 regno, INSN_UID (insn));
3748 fprintf (gcse_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (src));
3749 fprintf (gcse_file, "\n");
3750 }
3751
3752 /* The original insn setting reg_used may or may not now be
3753 deletable. We leave the deletion to flow. */
3754 }
3755 }
3756 else if (GET_CODE (src) == REG
3757 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3758 && REGNO (src) != regno)
3759 {
3760 /* We know the set is available.
3761 Now check that SET_SRC is ANTLOC (i.e. none of the source operands
3762 have changed since the start of the block). */
3763 if (oprs_not_set_p (src, insn))
3764 {
3765 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3766 {
3767 changed = 1;
3768 copy_prop_count++;
3769 if (gcse_file != NULL)
3770 {
3771 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3772 regno, INSN_UID (insn), REGNO (src));
3773 }
3774
3775 /* The original insn setting reg_used may or may not now be
3776 deletable. We leave the deletion to flow. */
3777 /* FIXME: If it turns out that the insn isn't deletable,
3778 then we may have unnecessarily extended register lifetimes
3779 and made things worse. */
3780 }
3781 }
3782 }
3783 }
3784
3785 return changed;
3786 }
3787
3788 /* Forward propagate copies.
3789 This includes copies and constants.
3790 Return non-zero if a change was made. */
3791
3792 static int
3793 cprop ()
3794 {
3795 int bb, changed;
3796 rtx insn;
3797
3798 /* Note we start at block 1. */
3799
3800 changed = 0;
3801 for (bb = 1; bb < n_basic_blocks; bb++)
3802 {
3803 /* Reset tables used to keep track of what's still valid [since the
3804 start of the block]. */
3805 reset_opr_set_tables ();
3806
3807 for (insn = basic_block_head[bb];
3808 insn != NULL && insn != NEXT_INSN (basic_block_end[bb]);
3809 insn = NEXT_INSN (insn))
3810 {
3811 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3812 {
3813 changed |= cprop_insn (insn);
3814
3815 /* Keep track of everything modified by this insn. */
3816 /* ??? Need to be careful w.r.t. mods done to INSN. */
3817 mark_oprs_set (insn);
3818 }
3819 }
3820 }
3821
3822 if (gcse_file != NULL)
3823 fprintf (gcse_file, "\n");
3824
3825 return changed;
3826 }
3827
3828 /* Perform one copy/constant propagation pass.
3829 F is the first insn in the function.
3830 PASS is the pass count. */
3831
3832 static int
3833 one_cprop_pass (f, pass)
3834 rtx f;
3835 int pass;
3836 {
3837 int changed = 0;
3838
3839 const_prop_count = 0;
3840 copy_prop_count = 0;
3841
3842 alloc_set_hash_table (max_cuid);
3843 compute_set_hash_table (f);
3844 if (gcse_file)
3845 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3846 n_sets);
3847 if (n_sets > 0)
3848 {
3849 alloc_cprop_mem (n_basic_blocks, n_sets);
3850 compute_cprop_data ();
3851 changed = cprop ();
3852 free_cprop_mem ();
3853 }
3854 free_set_hash_table ();
3855
3856 if (gcse_file)
3857 {
3858 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
3859 current_function_name, pass,
3860 bytes_used, const_prop_count, copy_prop_count);
3861 fprintf (gcse_file, "\n");
3862 }
3863
3864 return changed;
3865 }
3866 \f
3867 /* Compute PRE working variables. */
3868
3869 /* Local properties of expressions. */
3870 /* Nonzero for expressions that are transparent in the block. */
3871 static sbitmap *pre_transp;
3872 /* Nonzero for expressions that are computed (available) in the block. */
3873 static sbitmap *pre_comp;
3874 /* Nonzero for expressions that are locally anticipatable in the block. */
3875 static sbitmap *pre_antloc;
3876
3877 /* Global properties (computed from the expression local properties). */
3878 /* Nonzero for expressions that are available on block entry/exit. */
3879 static sbitmap *pre_avin;
3880 static sbitmap *pre_avout;
3881 /* Nonzero for expressions that are anticipatable on block entry/exit. */
3882 static sbitmap *pre_antin;
3883 static sbitmap *pre_antout;
3884 /* Nonzero for expressions that are partially available on block entry/exit. */
3885 static sbitmap *pre_pavin;
3886 static sbitmap *pre_pavout;
3887 /* Nonzero for expressions that are "placement possible" on block entry/exit. */
3888 static sbitmap *pre_ppin;
3889 static sbitmap *pre_ppout;
3890
3891 /* Used while performing PRE to denote which insns are redundant. */
3892 static sbitmap pre_redundant;
3893
3894 /* Allocate vars used for PRE analysis. */
3895
3896 static void
3897 alloc_pre_mem (n_blocks, n_exprs)
3898 int n_blocks, n_exprs;
3899 {
3900 pre_transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3901 pre_comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3902 pre_antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3903
3904 pre_avin = sbitmap_vector_alloc (n_blocks, n_exprs);
3905 pre_avout = sbitmap_vector_alloc (n_blocks, n_exprs);
3906 pre_antin = sbitmap_vector_alloc (n_blocks, n_exprs);
3907 pre_antout = sbitmap_vector_alloc (n_blocks, n_exprs);
3908
3909 pre_pavin = sbitmap_vector_alloc (n_blocks, n_exprs);
3910 pre_pavout = sbitmap_vector_alloc (n_blocks, n_exprs);
3911 pre_ppin = sbitmap_vector_alloc (n_blocks, n_exprs);
3912 pre_ppout = sbitmap_vector_alloc (n_blocks, n_exprs);
3913 }
3914
3915 /* Free vars used for PRE analysis. */
3916
3917 static void
3918 free_pre_mem ()
3919 {
3920 free (pre_transp);
3921 free (pre_comp);
3922 free (pre_antloc);
3923
3924 free (pre_avin);
3925 free (pre_avout);
3926 free (pre_antin);
3927 free (pre_antout);
3928
3929 free (pre_pavin);
3930 free (pre_pavout);
3931 free (pre_ppin);
3932 free (pre_ppout);
3933 }
3934
3935 /* Dump PRE data. */
3936
3937 void
3938 dump_pre_data (file)
3939 FILE *file;
3940 {
3941 dump_sbitmap_vector (file, "PRE locally transparent expressions", "BB",
3942 pre_transp, n_basic_blocks);
3943 dump_sbitmap_vector (file, "PRE locally available expressions", "BB",
3944 pre_comp, n_basic_blocks);
3945 dump_sbitmap_vector (file, "PRE locally anticipatable expressions", "BB",
3946 pre_antloc, n_basic_blocks);
3947
3948 dump_sbitmap_vector (file, "PRE available incoming expressions", "BB",
3949 pre_avin, n_basic_blocks);
3950 dump_sbitmap_vector (file, "PRE available outgoing expressions", "BB",
3951 pre_avout, n_basic_blocks);
3952 dump_sbitmap_vector (file, "PRE anticipatable incoming expressions", "BB",
3953 pre_antin, n_basic_blocks);
3954 dump_sbitmap_vector (file, "PRE anticipatable outgoing expressions", "BB",
3955 pre_antout, n_basic_blocks);
3956
3957 dump_sbitmap_vector (file, "PRE partially available incoming expressions", "BB",
3958 pre_pavin, n_basic_blocks);
3959 dump_sbitmap_vector (file, "PRE partially available outgoing expressions", "BB",
3960 pre_pavout, n_basic_blocks);
3961 dump_sbitmap_vector (file, "PRE placement possible on incoming", "BB",
3962 pre_ppin, n_basic_blocks);
3963 dump_sbitmap_vector (file, "PRE placement possible on outgoing", "BB",
3964 pre_ppout, n_basic_blocks);
3965 }
3966
3967 /* Compute the local properties of each recorded expression.
3968 Local properties are those that are defined by the block, irrespective
3969 of other blocks.
3970
3971 An expression is transparent in a block if its operands are not modified
3972 in the block.
3973
3974 An expression is computed (locally available) in a block if it is computed
3975 at least once and expression would contain the same value if the
3976 computation was moved to the end of the block.
3977
3978 An expression is locally anticipatable in a block if it is computed at
3979 least once and expression would contain the same value if the computation
3980 was moved to the beginning of the block. */
3981
3982 static void
3983 compute_pre_local_properties ()
3984 {
3985 int i;
3986
3987 sbitmap_vector_ones (pre_transp, n_basic_blocks);
3988 sbitmap_vector_zero (pre_comp, n_basic_blocks);
3989 sbitmap_vector_zero (pre_antloc, n_basic_blocks);
3990
3991 for (i = 0; i < expr_hash_table_size; i++)
3992 {
3993 struct expr *expr;
3994
3995 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
3996 {
3997 struct occr *occr;
3998 int indx = expr->bitmap_index;
3999
4000 /* The expression is transparent in this block if it is not killed.
4001 We start by assuming all are transparent [none are killed], and then
4002 reset the bits for those that are. */
4003
4004 compute_transp (expr->expr, indx, pre_transp, 0);
4005
4006 /* The occurrences recorded in antic_occr are exactly those that
4007 we want to set to non-zero in ANTLOC. */
4008
4009 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4010 {
4011 int bb = BLOCK_NUM (occr->insn);
4012 SET_BIT (pre_antloc[bb], indx);
4013
4014 /* While we're scanning the table, this is a good place to
4015 initialize this. */
4016 occr->deleted_p = 0;
4017 }
4018
4019 /* The occurrences recorded in avail_occr are exactly those that
4020 we want to set to non-zero in COMP. */
4021
4022 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
4023 {
4024 int bb = BLOCK_NUM (occr->insn);
4025 SET_BIT (pre_comp[bb], indx);
4026
4027 /* While we're scanning the table, this is a good place to
4028 initialize this. */
4029 occr->copied_p = 0;
4030 }
4031
4032 /* While we're scanning the table, this is a good place to
4033 initialize this. */
4034 expr->reaching_reg = 0;
4035 }
4036 }
4037 }
4038
4039 /* Compute expression availability at entrance and exit of each block. */
4040
4041 static void
4042 compute_pre_avinout ()
4043 {
4044 int bb, changed, passes;
4045
4046 sbitmap_zero (pre_avin[0]);
4047 sbitmap_vector_ones (pre_avout, n_basic_blocks);
4048
4049 passes = 0;
4050 changed = 1;
4051 while (changed)
4052 {
4053 changed = 0;
4054 for (bb = 0; bb < n_basic_blocks; bb++)
4055 {
4056 if (bb != 0)
4057 sbitmap_intersect_of_predecessors (pre_avin[bb], pre_avout,
4058 bb, s_preds);
4059 changed |= sbitmap_a_or_b_and_c (pre_avout[bb], pre_comp[bb],
4060 pre_transp[bb], pre_avin[bb]);
4061 }
4062 passes++;
4063 }
4064
4065 if (gcse_file)
4066 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
4067 }
4068
4069 /* Compute expression anticipatability at entrance and exit of each block. */
4070
4071 static void
4072 compute_pre_antinout ()
4073 {
4074 int bb, changed, passes;
4075
4076 sbitmap_zero (pre_antout[n_basic_blocks - 1]);
4077 sbitmap_vector_ones (pre_antin, n_basic_blocks);
4078
4079 passes = 0;
4080 changed = 1;
4081 while (changed)
4082 {
4083 changed = 0;
4084 /* We scan the blocks in the reverse order to speed up
4085 the convergence. */
4086 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
4087 {
4088 if (bb != n_basic_blocks - 1)
4089 sbitmap_intersect_of_successors (pre_antout[bb], pre_antin,
4090 bb, s_succs);
4091 changed |= sbitmap_a_or_b_and_c (pre_antin[bb], pre_antloc[bb],
4092 pre_transp[bb], pre_antout[bb]);
4093 }
4094 passes++;
4095 }
4096
4097 if (gcse_file)
4098 fprintf (gcse_file, "antic expr computation: %d passes\n", passes);
4099 }
4100
4101 /* Compute expression partial availability at entrance and exit of
4102 each block. */
4103
4104 static void
4105 compute_pre_pavinout ()
4106 {
4107 int bb, changed, passes;
4108
4109 sbitmap_zero (pre_pavin[0]);
4110 sbitmap_vector_zero (pre_pavout, n_basic_blocks);
4111
4112 passes = 0;
4113 changed = 1;
4114 while (changed)
4115 {
4116 changed = 0;
4117 for (bb = 0; bb < n_basic_blocks; bb++)
4118 {
4119 if (bb != 0)
4120 sbitmap_union_of_predecessors (pre_pavin[bb], pre_pavout,
4121 bb, s_preds);
4122 changed |= sbitmap_a_or_b_and_c (pre_pavout[bb], pre_comp[bb],
4123 pre_transp[bb], pre_pavin[bb]);
4124 }
4125 passes++;
4126 }
4127
4128 if (gcse_file)
4129 fprintf (gcse_file, "partially avail expr computation: %d passes\n", passes);
4130 }
4131
4132 /* Compute "placement possible" information on entrance and exit of
4133 each block.
4134
4135 From Fred Chow's Thesis:
4136 A computation `e' is PP at a point `p' if it is anticipated at `p' and
4137 all the anticipated e's can be rendered redundant by zero or more
4138 insertions at that point and some other points in the procedure, and
4139 these insertions satisfy the conditions that the insertions are always
4140 at points that `e' is anticipated and the first anticipated e's after the
4141 insertions are rendered redundant. */
4142
4143 static void
4144 compute_pre_ppinout ()
4145 {
4146 int bb, i, changed, size, passes;
4147
4148 sbitmap_vector_ones (pre_ppin, n_basic_blocks);
4149 /* ??? Inefficient as we set pre_ppin[0] twice, but simple. */
4150 sbitmap_zero (pre_ppin[0]);
4151
4152 sbitmap_vector_ones (pre_ppout, n_basic_blocks);
4153 /* ??? Inefficient as we set pre_ppout[n_basic_blocks-1] twice, but simple. */
4154 sbitmap_zero (pre_ppout[n_basic_blocks - 1]);
4155
4156 size = pre_ppin[0]->size;
4157 passes = 0;
4158 changed = 1;
4159 while (changed)
4160 {
4161 changed = 0;
4162 for (bb = 1; bb < n_basic_blocks; bb++)
4163 {
4164 sbitmap_ptr antin = pre_antin[bb]->elms;
4165 sbitmap_ptr pavin = pre_pavin[bb]->elms;
4166 sbitmap_ptr antloc = pre_antloc[bb]->elms;
4167 sbitmap_ptr transp = pre_transp[bb]->elms;
4168 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4169 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4170
4171 for (i = 0; i < size; i++)
4172 {
4173 int_list_ptr pred;
4174 SBITMAP_ELT_TYPE tmp = *antin & *pavin & (*antloc | (*transp & *ppout));
4175 SBITMAP_ELT_TYPE pred_val = -1L;
4176
4177 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4178 {
4179 int pred_bb = INT_LIST_VAL (pred);
4180 sbitmap_ptr ppout_j,avout_j;
4181
4182 if (pred_bb == ENTRY_BLOCK)
4183 continue;
4184
4185 /* If this is a back edge, propagate info along the back
4186 edge to allow for loop invariant code motion.
4187
4188 See FOLLOW_BACK_EDGES at the top of this file for a longer
4189 discussion about loop invariant code motion in pre. */
4190 if (! FOLLOW_BACK_EDGES
4191 && (INSN_CUID (BLOCK_HEAD (bb))
4192 < INSN_CUID (BLOCK_END (pred_bb))))
4193 {
4194 pred_val = 0;
4195 }
4196 else
4197 {
4198 ppout_j = pre_ppout[pred_bb]->elms + i;
4199 avout_j = pre_avout[pred_bb]->elms + i;
4200 pred_val &= *ppout_j | *avout_j;
4201 }
4202 }
4203 tmp &= pred_val;
4204 *ppin = tmp;
4205 antin++; pavin++; antloc++; transp++; ppout++; ppin++;
4206 }
4207 }
4208
4209 for (bb = 0; bb < n_basic_blocks - 1; bb++)
4210 {
4211 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4212
4213 for (i = 0; i < size; i++)
4214 {
4215 int_list_ptr succ;
4216 SBITMAP_ELT_TYPE tmp = -1L;
4217
4218 for (succ = s_succs[bb]; succ != NULL; succ = succ->next)
4219 {
4220 int succ_bb = INT_LIST_VAL (succ);
4221 sbitmap_ptr ppin;
4222
4223 if (succ_bb == EXIT_BLOCK)
4224 continue;
4225
4226 ppin = pre_ppin[succ_bb]->elms + i;
4227 tmp &= *ppin;
4228 }
4229 if (*ppout != tmp)
4230 {
4231 changed = 1;
4232 *ppout++ = tmp;
4233 }
4234 else
4235 ppout++;
4236 }
4237 }
4238
4239 passes++;
4240 }
4241
4242 if (gcse_file)
4243 fprintf (gcse_file, "placement possible computation: %d passes\n", passes);
4244 }
4245
4246 /* Top level routine to do the dataflow analysis needed by PRE. */
4247
4248 static void
4249 compute_pre_data ()
4250 {
4251 compute_pre_local_properties ();
4252 compute_pre_avinout ();
4253 compute_pre_antinout ();
4254 compute_pre_pavinout ();
4255 compute_pre_ppinout ();
4256 if (gcse_file)
4257 fprintf (gcse_file, "\n");
4258 }
4259 \f
4260 /* PRE utilities */
4261
4262 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
4263
4264 VISITED is a pointer to a working buffer for tracking which BB's have
4265 been visited. It is NULL for the top-level call.
4266
4267 We treat reaching expressions that go through blocks containing the same
4268 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4269 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4270 2 as not reaching. The intent is to improve the probability of finding
4271 only one reaching expression and to reduce register lifetimes by picking
4272 the closest such expression. */
4273
4274 static int
4275 pre_expr_reaches_here_p (occr, expr, bb, visited)
4276 struct occr *occr;
4277 struct expr *expr;
4278 int bb;
4279 char *visited;
4280 {
4281 int_list_ptr pred;
4282
4283 if (visited == NULL)
4284 {
4285 visited = (char *) alloca (n_basic_blocks);
4286 bzero (visited, n_basic_blocks);
4287 }
4288
4289 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4290 {
4291 int pred_bb = INT_LIST_VAL (pred);
4292
4293 if (pred_bb == ENTRY_BLOCK
4294 /* Has predecessor has already been visited? */
4295 || visited[pred_bb])
4296 {
4297 /* Nothing to do. */
4298 }
4299 /* Does this predecessor generate this expression? */
4300 else if (TEST_BIT (pre_comp[pred_bb], expr->bitmap_index))
4301 {
4302 /* Is this the occurrence we're looking for?
4303 Note that there's only one generating occurrence per block
4304 so we just need to check the block number. */
4305 if (BLOCK_NUM (occr->insn) == pred_bb)
4306 return 1;
4307 visited[pred_bb] = 1;
4308 }
4309 /* Ignore this predecessor if it kills the expression. */
4310 else if (! TEST_BIT (pre_transp[pred_bb], expr->bitmap_index))
4311 visited[pred_bb] = 1;
4312 /* Neither gen nor kill. */
4313 else
4314 {
4315 visited[pred_bb] = 1;
4316 if (pre_expr_reaches_here_p (occr, expr, pred_bb, visited))
4317 return 1;
4318 }
4319 }
4320
4321 /* All paths have been checked. */
4322 return 0;
4323 }
4324 \f
4325 /* Add EXPR to the end of basic block BB. */
4326
4327 static void
4328 pre_insert_insn (expr, bb)
4329 struct expr *expr;
4330 int bb;
4331 {
4332 rtx insn = BLOCK_END (bb);
4333 rtx new_insn;
4334 rtx reg = expr->reaching_reg;
4335 int regno = REGNO (reg);
4336 rtx pat;
4337
4338 pat = gen_rtx_SET (VOIDmode, reg, copy_rtx (expr->expr));
4339
4340 /* If the last insn is a jump, insert EXPR in front [taking care to
4341 handle cc0, etc. properly]. */
4342
4343 if (GET_CODE (insn) == JUMP_INSN)
4344 {
4345 #ifdef HAVE_cc0
4346 rtx note;
4347 #endif
4348
4349 /* If this is a jump table, then we can't insert stuff here. Since
4350 we know the previous real insn must be the tablejump, we insert
4351 the new instruction just before the tablejump. */
4352 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4353 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4354 insn = prev_real_insn (insn);
4355
4356 #ifdef HAVE_cc0
4357 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4358 if cc0 isn't set. */
4359 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4360 if (note)
4361 insn = XEXP (note, 0);
4362 else
4363 {
4364 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4365 if (maybe_cc0_setter
4366 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4367 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4368 insn = maybe_cc0_setter;
4369 }
4370 #endif
4371 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4372 new_insn = emit_insn_before (pat, insn);
4373 add_label_notes (SET_SRC (pat), new_insn);
4374 if (BLOCK_HEAD (bb) == insn)
4375 BLOCK_HEAD (bb) = new_insn;
4376 /* Keep block number table up to date. */
4377 set_block_num (new_insn, bb);
4378 /* Keep register set table up to date. */
4379 record_one_set (regno, new_insn);
4380 }
4381 else
4382 {
4383 new_insn = emit_insn_after (pat, insn);
4384 add_label_notes (SET_SRC (pat), new_insn);
4385 BLOCK_END (bb) = new_insn;
4386 /* Keep block number table up to date. */
4387 set_block_num (new_insn, bb);
4388 /* Keep register set table up to date. */
4389 record_one_set (regno, new_insn);
4390 }
4391
4392 gcse_create_count++;
4393
4394 if (gcse_file)
4395 {
4396 fprintf (gcse_file, "PRE: end of bb %d, insn %d, copying expression %d to reg %d\n",
4397 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4398 }
4399 }
4400
4401 /* Insert partially redundant expressions at the ends of appropriate basic
4402 blocks making them now redundant. */
4403
4404 static void
4405 pre_insert (index_map)
4406 struct expr **index_map;
4407 {
4408 int bb, i, size;
4409
4410 /* Compute INSERT = PPOUT & (~AVOUT) & (~PPIN | ~TRANSP) for each
4411 expression. Where INSERT == TRUE, add the expression at the end of
4412 the basic block. */
4413
4414 size = pre_ppout[0]->size;
4415 for (bb = 0; bb < n_basic_blocks; bb++)
4416 {
4417 int indx;
4418 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4419 sbitmap_ptr avout = pre_avout[bb]->elms;
4420 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4421 sbitmap_ptr transp = pre_transp[bb]->elms;
4422
4423 for (i = indx = 0;
4424 i < size;
4425 i++, indx += SBITMAP_ELT_BITS, ppout++, avout++, ppin++, transp++)
4426 {
4427 int j;
4428 SBITMAP_ELT_TYPE insert = *ppout & (~*avout) & (~*ppin | ~*transp);
4429
4430 for (j = indx; insert != 0 && j < n_exprs; j++, insert >>= 1)
4431 {
4432 if ((insert & 1) != 0
4433 /* If the basic block isn't reachable, PPOUT will be TRUE.
4434 However, we don't want to insert a copy here because the
4435 expression may not really be redundant. So only insert
4436 an insn if the expression was deleted. */
4437 && index_map[j]->reaching_reg != NULL)
4438 pre_insert_insn (index_map[j], bb);
4439 }
4440 }
4441 }
4442 }
4443
4444 /* Copy the result of INSN to REG.
4445 INDX is the expression number. */
4446
4447 static void
4448 pre_insert_copy_insn (expr, insn)
4449 struct expr *expr;
4450 rtx insn;
4451 {
4452 rtx reg = expr->reaching_reg;
4453 int regno = REGNO (reg);
4454 int indx = expr->bitmap_index;
4455 rtx set = single_set (insn);
4456 rtx new_insn;
4457
4458 if (!set)
4459 abort ();
4460 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4461 insn);
4462 /* Keep block number table up to date. */
4463 set_block_num (new_insn, BLOCK_NUM (insn));
4464 /* Keep register set table up to date. */
4465 record_one_set (regno, new_insn);
4466
4467 gcse_create_count++;
4468
4469 if (gcse_file)
4470 {
4471 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4472 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4473 }
4474 }
4475
4476 /* Copy available expressions that reach the redundant expression
4477 to `reaching_reg'. */
4478
4479 static void
4480 pre_insert_copies ()
4481 {
4482 int i;
4483
4484 /* For each available expression in the table, copy the result to
4485 `reaching_reg' if the expression reaches a deleted one.
4486
4487 ??? The current algorithm is rather brute force.
4488 Need to do some profiling. */
4489
4490 for (i = 0; i < expr_hash_table_size; i++)
4491 {
4492 struct expr *expr;
4493
4494 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4495 {
4496 struct occr *occr;
4497
4498 /* If the basic block isn't reachable, PPOUT will be TRUE.
4499 However, we don't want to insert a copy here because the
4500 expression may not really be redundant. So only insert
4501 an insn if the expression was deleted.
4502 This test also avoids further processing if the expression
4503 wasn't deleted anywhere. */
4504 if (expr->reaching_reg == NULL)
4505 continue;
4506
4507 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4508 {
4509 struct occr *avail;
4510
4511 if (! occr->deleted_p)
4512 continue;
4513
4514 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4515 {
4516 rtx insn = avail->insn;
4517
4518 /* No need to handle this one if handled already. */
4519 if (avail->copied_p)
4520 continue;
4521 /* Don't handle this one if it's a redundant one. */
4522 if (TEST_BIT (pre_redundant, INSN_CUID (insn)))
4523 continue;
4524 /* Or if the expression doesn't reach the deleted one. */
4525 if (! pre_expr_reaches_here_p (avail, expr,
4526 BLOCK_NUM (occr->insn),
4527 NULL))
4528 continue;
4529
4530 /* Copy the result of avail to reaching_reg. */
4531 pre_insert_copy_insn (expr, insn);
4532 avail->copied_p = 1;
4533 }
4534 }
4535 }
4536 }
4537 }
4538
4539 /* Delete redundant computations.
4540 These are ones that satisy ANTLOC & PPIN.
4541 Deletion is done by changing the insn to copy the `reaching_reg' of
4542 the expression into the result of the SET. It is left to later passes
4543 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4544
4545 Returns non-zero if a change is made. */
4546
4547 static int
4548 pre_delete ()
4549 {
4550 int i, changed;
4551
4552 changed = 0;
4553 for (i = 0; i < expr_hash_table_size; i++)
4554 {
4555 struct expr *expr;
4556
4557 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4558 {
4559 struct occr *occr;
4560 int indx = expr->bitmap_index;
4561
4562 /* We only need to search antic_occr since we require
4563 ANTLOC != 0. */
4564
4565 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4566 {
4567 rtx insn = occr->insn;
4568 rtx set;
4569 int bb = BLOCK_NUM (insn);
4570 sbitmap ppin = pre_ppin[bb];
4571
4572 if (TEST_BIT (ppin, indx))
4573 {
4574 set = single_set (insn);
4575 if (! set)
4576 abort ();
4577
4578 /* Create a pseudo-reg to store the result of reaching
4579 expressions into. Get the mode for the new pseudo
4580 from the mode of the original destination pseudo. */
4581 if (expr->reaching_reg == NULL)
4582 expr->reaching_reg
4583 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4584
4585 /* In theory this should never fail since we're creating
4586 a reg->reg copy.
4587
4588 However, on the x86 some of the movXX patterns actually
4589 contain clobbers of scratch regs. This may cause the
4590 insn created by validate_change to not patch any pattern
4591 and thus cause validate_change to fail. */
4592 if (validate_change (insn, &SET_SRC (set),
4593 expr->reaching_reg, 0))
4594 {
4595 occr->deleted_p = 1;
4596 SET_BIT (pre_redundant, INSN_CUID (insn));
4597 changed = 1;
4598 gcse_subst_count++;
4599 }
4600
4601 if (gcse_file)
4602 {
4603 fprintf (gcse_file, "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4604 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4605 }
4606 }
4607 }
4608 }
4609 }
4610
4611 return changed;
4612 }
4613
4614 /* Perform GCSE optimizations using PRE.
4615 This is called by one_pre_gcse_pass after all the dataflow analysis
4616 has been done.
4617
4618 This is based on the original Morel-Renvoise paper and Fred Chow's thesis.
4619
4620 The M-R paper uses "TRANSP" to describe an expression as being transparent
4621 in a block where as Chow's thesis uses "ALTERED". We use TRANSP.
4622
4623 ??? A new pseudo reg is created to hold the reaching expression.
4624 The nice thing about the classical approach is that it would try to
4625 use an existing reg. If the register can't be adequately optimized
4626 [i.e. we introduce reload problems], one could add a pass here to
4627 propagate the new register through the block.
4628
4629 ??? We don't handle single sets in PARALLELs because we're [currently]
4630 not able to copy the rest of the parallel when we insert copies to create
4631 full redundancies from partial redundancies. However, there's no reason
4632 why we can't handle PARALLELs in the cases where there are no partial
4633 redundancies. */
4634
4635 static int
4636 pre_gcse ()
4637 {
4638 int i;
4639 int changed;
4640 struct expr **index_map;
4641
4642 /* Compute a mapping from expression number (`bitmap_index') to
4643 hash table entry. */
4644
4645 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4646 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4647 for (i = 0; i < expr_hash_table_size; i++)
4648 {
4649 struct expr *expr;
4650
4651 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4652 index_map[expr->bitmap_index] = expr;
4653 }
4654
4655 /* Reset bitmap used to track which insns are redundant. */
4656 pre_redundant = sbitmap_alloc (max_cuid);
4657 sbitmap_zero (pre_redundant);
4658
4659 /* Delete the redundant insns first so that
4660 - we know what register to use for the new insns and for the other
4661 ones with reaching expressions
4662 - we know which insns are redundant when we go to create copies */
4663 changed = pre_delete ();
4664
4665 /* Insert insns in places that make partially redundant expressions
4666 fully redundant. */
4667 pre_insert (index_map);
4668
4669 /* In other places with reaching expressions, copy the expression to the
4670 specially allocated pseudo-reg that reaches the redundant expression. */
4671 pre_insert_copies ();
4672
4673 free (pre_redundant);
4674
4675 return changed;
4676 }
4677
4678 /* Top level routine to perform one PRE GCSE pass.
4679
4680 Return non-zero if a change was made. */
4681
4682 static int
4683 one_pre_gcse_pass (f, pass)
4684 rtx f;
4685 int pass;
4686 {
4687 int changed = 0;
4688
4689 gcse_subst_count = 0;
4690 gcse_create_count = 0;
4691
4692 alloc_expr_hash_table (max_cuid);
4693 compute_expr_hash_table (f);
4694 if (gcse_file)
4695 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4696 expr_hash_table_size, n_exprs);
4697 if (n_exprs > 0)
4698 {
4699 alloc_pre_mem (n_basic_blocks, n_exprs);
4700 compute_pre_data ();
4701 changed |= pre_gcse ();
4702 free_pre_mem ();
4703 }
4704 free_expr_hash_table ();
4705
4706 if (gcse_file)
4707 {
4708 fprintf (gcse_file, "\n");
4709 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4710 current_function_name, pass,
4711 bytes_used, gcse_subst_count, gcse_create_count);
4712 }
4713
4714 return changed;
4715 }
4716 \f
4717 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4718 We have to add REG_LABEL notes, because the following loop optimization
4719 pass requires them. */
4720
4721 /* ??? This is very similar to the loop.c add_label_notes function. We
4722 could probably share code here. */
4723
4724 /* ??? If there was a jump optimization pass after gcse and before loop,
4725 then we would not need to do this here, because jump would add the
4726 necessary REG_LABEL notes. */
4727
4728 static void
4729 add_label_notes (x, insn)
4730 rtx x;
4731 rtx insn;
4732 {
4733 enum rtx_code code = GET_CODE (x);
4734 int i, j;
4735 char *fmt;
4736
4737 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4738 {
4739 /* This code used to ignore labels that referred to dispatch tables to
4740 avoid flow generating (slighly) worse code.
4741
4742 We no longer ignore such label references (see LABEL_REF handling in
4743 mark_jump_label for additional information). */
4744 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4745 REG_NOTES (insn));
4746 return;
4747 }
4748
4749 fmt = GET_RTX_FORMAT (code);
4750 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4751 {
4752 if (fmt[i] == 'e')
4753 add_label_notes (XEXP (x, i), insn);
4754 else if (fmt[i] == 'E')
4755 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4756 add_label_notes (XVECEXP (x, i, j), insn);
4757 }
4758 }
This page took 0.34588 seconds and 6 git commands to generate.