]> gcc.gnu.org Git - gcc.git/blame - gcc/gcse.c
Daily bump.
[gcc.git] / gcc / gcse.c
CommitLineData
f4e584dc 1/* Global common subexpression elimination/Partial redundancy elimination
7506f491 2 and global constant/copy propagation for GNU compiler.
dd1bd863 3 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
7506f491
DE
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22/* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
f4e584dc
JL
27 - dead store elimination
28 - a store to the same address as a load does not kill the load if the
29 source of the store is also the destination of the load. Handling this
30 allows more load motion, particularly out of loops.
7506f491
DE
31 - ability to realloc sbitmap vectors would allow one initial computation
32 of reg_set_in_block with only subsequent additions, rather than
33 recomputing it for each pass
34
7506f491
DE
35*/
36
37/* References searched while implementing this.
7506f491
DE
38
39 Compilers Principles, Techniques and Tools
40 Aho, Sethi, Ullman
41 Addison-Wesley, 1988
42
43 Global Optimization by Suppression of Partial Redundancies
44 E. Morel, C. Renvoise
45 communications of the acm, Vol. 22, Num. 2, Feb. 1979
46
47 A Portable Machine-Independent Global Optimizer - Design and Measurements
48 Frederick Chow
49 Stanford Ph.D. thesis, Dec. 1983
50
7506f491
DE
51 A Fast Algorithm for Code Movement Optimization
52 D.M. Dhamdhere
53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
54
55 A Solution to a Problem with Morel and Renvoise's
56 Global Optimization by Suppression of Partial Redundancies
57 K-H Drechsler, M.P. Stadel
58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
59
60 Practical Adaptation of the Global Optimization
61 Algorithm of Morel and Renvoise
62 D.M. Dhamdhere
63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
64
65 Efficiently Computing Static Single Assignment Form and the Control
66 Dependence Graph
67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
69
7506f491
DE
70 Lazy Code Motion
71 J. Knoop, O. Ruthing, B. Steffen
72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
73
74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
75 Time for Reducible Flow Control
76 Thomas Ball
77 ACM Letters on Programming Languages and Systems,
78 Vol. 2, Num. 1-4, Mar-Dec 1993
79
80 An Efficient Representation for Sparse Sets
81 Preston Briggs, Linda Torczon
82 ACM Letters on Programming Languages and Systems,
83 Vol. 2, Num. 1-4, Mar-Dec 1993
84
85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
86 K-H Drechsler, M.P. Stadel
87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
88
89 Partial Dead Code Elimination
90 J. Knoop, O. Ruthing, B. Steffen
91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
92
93 Effective Partial Redundancy Elimination
94 P. Briggs, K.D. Cooper
95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
96
97 The Program Structure Tree: Computing Control Regions in Linear Time
98 R. Johnson, D. Pearson, K. Pingali
99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
100
101 Optimal Code Motion: Theory and Practice
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
104
105 The power of assignment motion
106 J. Knoop, O. Ruthing, B. Steffen
107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
108
109 Global code motion / global value numbering
110 C. Click
111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
112
113 Value Driven Redundancy Elimination
114 L.T. Simpson
115 Rice University Ph.D. thesis, Apr. 1996
116
117 Value Numbering
118 L.T. Simpson
119 Massively Scalar Compiler Project, Rice University, Sep. 1996
120
121 High Performance Compilers for Parallel Computing
122 Michael Wolfe
123 Addison-Wesley, 1996
124
f4e584dc
JL
125 Advanced Compiler Design and Implementation
126 Steven Muchnick
127 Morgan Kaufmann, 1997
128
a42cd965
AM
129 Building an Optimizing Compiler
130 Robert Morgan
131 Digital Press, 1998
132
f4e584dc
JL
133 People wishing to speed up the code here should read:
134 Elimination Algorithms for Data Flow Analysis
135 B.G. Ryder, M.C. Paull
136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
137
138 How to Analyze Large Programs Efficiently and Informatively
139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
141
7506f491
DE
142 People wishing to do something different can find various possibilities
143 in the above papers and elsewhere.
144*/
145
146#include "config.h"
50b2596f 147#include "system.h"
01198c2f 148#include "toplev.h"
7506f491
DE
149
150#include "rtl.h"
6baf1cc8 151#include "tm_p.h"
7506f491
DE
152#include "regs.h"
153#include "hard-reg-set.h"
154#include "flags.h"
155#include "real.h"
156#include "insn-config.h"
157#include "recog.h"
158#include "basic-block.h"
50b2596f 159#include "output.h"
49ad7cfa 160#include "function.h"
3cdbd1f8 161#include "expr.h"
7506f491
DE
162
163#include "obstack.h"
164#define obstack_chunk_alloc gmalloc
165#define obstack_chunk_free free
166
167/* Maximum number of passes to perform. */
168#define MAX_PASSES 1
169
170/* Propagate flow information through back edges and thus enable PRE's
171 moving loop invariant calculations out of loops.
172
173 Originally this tended to create worse overall code, but several
174 improvements during the development of PRE seem to have made following
175 back edges generally a win.
176
177 Note much of the loop invariant code motion done here would normally
178 be done by loop.c, which has more heuristics for when to move invariants
179 out of loops. At some point we might need to move some of those
180 heuristics into gcse.c. */
181#define FOLLOW_BACK_EDGES 1
182
f4e584dc
JL
183/* We support GCSE via Partial Redundancy Elimination. PRE optimizations
184 are a superset of those done by GCSE.
7506f491 185
f4e584dc 186 We perform the following steps:
7506f491
DE
187
188 1) Compute basic block information.
189
190 2) Compute table of places where registers are set.
191
192 3) Perform copy/constant propagation.
193
194 4) Perform global cse.
195
e78d9500 196 5) Perform another pass of copy/constant propagation.
7506f491
DE
197
198 Two passes of copy/constant propagation are done because the first one
199 enables more GCSE and the second one helps to clean up the copies that
200 GCSE creates. This is needed more for PRE than for Classic because Classic
201 GCSE will try to use an existing register containing the common
202 subexpression rather than create a new one. This is harder to do for PRE
203 because of the code motion (which Classic GCSE doesn't do).
204
205 Expressions we are interested in GCSE-ing are of the form
206 (set (pseudo-reg) (expression)).
207 Function want_to_gcse_p says what these are.
208
209 PRE handles moving invariant expressions out of loops (by treating them as
f4e584dc 210 partially redundant).
7506f491
DE
211
212 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
213 assignment) based GVN (global value numbering). L. T. Simpson's paper
214 (Rice University) on value numbering is a useful reference for this.
215
216 **********************
217
218 We used to support multiple passes but there are diminishing returns in
219 doing so. The first pass usually makes 90% of the changes that are doable.
220 A second pass can make a few more changes made possible by the first pass.
221 Experiments show any further passes don't make enough changes to justify
222 the expense.
223
224 A study of spec92 using an unlimited number of passes:
225 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
226 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
227 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
228
229 It was found doing copy propagation between each pass enables further
230 substitutions.
231
232 PRE is quite expensive in complicated functions because the DFA can take
233 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
234 be modified if one wants to experiment.
235
236 **********************
237
238 The steps for PRE are:
239
240 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
241
242 2) Perform the data flow analysis for PRE.
243
244 3) Delete the redundant instructions
245
246 4) Insert the required copies [if any] that make the partially
247 redundant instructions fully redundant.
248
249 5) For other reaching expressions, insert an instruction to copy the value
250 to a newly created pseudo that will reach the redundant instruction.
251
252 The deletion is done first so that when we do insertions we
253 know which pseudo reg to use.
254
255 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
256 argue it is not. The number of iterations for the algorithm to converge
257 is typically 2-4 so I don't view it as that expensive (relatively speaking).
258
f4e584dc 259 PRE GCSE depends heavily on the second CSE pass to clean up the copies
7506f491
DE
260 we create. To make an expression reach the place where it's redundant,
261 the result of the expression is copied to a new register, and the redundant
262 expression is deleted by replacing it with this new register. Classic GCSE
263 doesn't have this problem as much as it computes the reaching defs of
264 each register in each block and thus can try to use an existing register.
265
266 **********************
267
7506f491
DE
268 A fair bit of simplicity is created by creating small functions for simple
269 tasks, even when the function is only called in one place. This may
270 measurably slow things down [or may not] by creating more function call
271 overhead than is necessary. The source is laid out so that it's trivial
272 to make the affected functions inline so that one can measure what speed
273 up, if any, can be achieved, and maybe later when things settle things can
274 be rearranged.
275
276 Help stamp out big monolithic functions! */
277\f
278/* GCSE global vars. */
279
280/* -dG dump file. */
281static FILE *gcse_file;
282
f4e584dc
JL
283/* Note whether or not we should run jump optimization after gcse. We
284 want to do this for two cases.
285
286 * If we changed any jumps via cprop.
287
288 * If we added any labels via edge splitting. */
289
290static int run_jump_opt_after_gcse;
291
7506f491
DE
292/* Bitmaps are normally not included in debugging dumps.
293 However it's useful to be able to print them from GDB.
294 We could create special functions for this, but it's simpler to
295 just allow passing stderr to the dump_foo fns. Since stderr can
296 be a macro, we store a copy here. */
297static FILE *debug_stderr;
298
299/* An obstack for our working variables. */
300static struct obstack gcse_obstack;
301
302/* Non-zero for each mode that supports (set (reg) (reg)).
303 This is trivially true for integer and floating point values.
304 It may or may not be true for condition codes. */
305static char can_copy_p[(int) NUM_MACHINE_MODES];
306
307/* Non-zero if can_copy_p has been initialized. */
308static int can_copy_init_p;
309
abd535b6
BS
310struct reg_use {
311 rtx reg_rtx;
312};
313
7506f491
DE
314/* Hash table of expressions. */
315
316struct expr
317{
318 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
319 rtx expr;
320 /* Index in the available expression bitmaps. */
321 int bitmap_index;
322 /* Next entry with the same hash. */
323 struct expr *next_same_hash;
324 /* List of anticipatable occurrences in basic blocks in the function.
325 An "anticipatable occurrence" is one that is the first occurrence in the
f4e584dc
JL
326 basic block, the operands are not modified in the basic block prior
327 to the occurrence and the output is not used between the start of
328 the block and the occurrence. */
7506f491
DE
329 struct occr *antic_occr;
330 /* List of available occurrence in basic blocks in the function.
331 An "available occurrence" is one that is the last occurrence in the
332 basic block and the operands are not modified by following statements in
333 the basic block [including this insn]. */
334 struct occr *avail_occr;
335 /* Non-null if the computation is PRE redundant.
336 The value is the newly created pseudo-reg to record a copy of the
337 expression in all the places that reach the redundant copy. */
338 rtx reaching_reg;
339};
340
341/* Occurrence of an expression.
342 There is one per basic block. If a pattern appears more than once the
343 last appearance is used [or first for anticipatable expressions]. */
344
345struct occr
346{
347 /* Next occurrence of this expression. */
348 struct occr *next;
349 /* The insn that computes the expression. */
350 rtx insn;
351 /* Non-zero if this [anticipatable] occurrence has been deleted. */
352 char deleted_p;
353 /* Non-zero if this [available] occurrence has been copied to
354 reaching_reg. */
355 /* ??? This is mutually exclusive with deleted_p, so they could share
356 the same byte. */
357 char copied_p;
358};
359
360/* Expression and copy propagation hash tables.
361 Each hash table is an array of buckets.
362 ??? It is known that if it were an array of entries, structure elements
363 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
364 not clear whether in the final analysis a sufficient amount of memory would
365 be saved as the size of the available expression bitmaps would be larger
366 [one could build a mapping table without holes afterwards though].
367 Someday I'll perform the computation and figure it out.
368*/
369
370/* Total size of the expression hash table, in elements. */
371static int expr_hash_table_size;
372/* The table itself.
373 This is an array of `expr_hash_table_size' elements. */
374static struct expr **expr_hash_table;
375
376/* Total size of the copy propagation hash table, in elements. */
377static int set_hash_table_size;
378/* The table itself.
379 This is an array of `set_hash_table_size' elements. */
380static struct expr **set_hash_table;
381
382/* Mapping of uids to cuids.
383 Only real insns get cuids. */
384static int *uid_cuid;
385
386/* Highest UID in UID_CUID. */
387static int max_uid;
388
389/* Get the cuid of an insn. */
390#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
391
392/* Number of cuids. */
393static int max_cuid;
394
395/* Mapping of cuids to insns. */
396static rtx *cuid_insn;
397
398/* Get insn from cuid. */
399#define CUID_INSN(CUID) (cuid_insn[CUID])
400
401/* Maximum register number in function prior to doing gcse + 1.
402 Registers created during this pass have regno >= max_gcse_regno.
403 This is named with "gcse" to not collide with global of same name. */
404static int max_gcse_regno;
405
406/* Maximum number of cse-able expressions found. */
407static int n_exprs;
408/* Maximum number of assignments for copy propagation found. */
409static int n_sets;
410
411/* Table of registers that are modified.
412 For each register, each element is a list of places where the pseudo-reg
413 is set.
414
415 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
416 requires knowledge of which blocks kill which regs [and thus could use
f4e584dc 417 a bitmap instead of the lists `reg_set_table' uses].
7506f491 418
f4e584dc
JL
419 `reg_set_table' and could be turned into an array of bitmaps
420 (num-bbs x num-regs)
7506f491
DE
421 [however perhaps it may be useful to keep the data as is].
422 One advantage of recording things this way is that `reg_set_table' is
423 fairly sparse with respect to pseudo regs but for hard regs could be
424 fairly dense [relatively speaking].
425 And recording sets of pseudo-regs in lists speeds
426 up functions like compute_transp since in the case of pseudo-regs we only
427 need to iterate over the number of times a pseudo-reg is set, not over the
428 number of basic blocks [clearly there is a bit of a slow down in the cases
429 where a pseudo is set more than once in a block, however it is believed
430 that the net effect is to speed things up]. This isn't done for hard-regs
431 because recording call-clobbered hard-regs in `reg_set_table' at each
432 function call can consume a fair bit of memory, and iterating over hard-regs
433 stored this way in compute_transp will be more expensive. */
434
435typedef struct reg_set {
436 /* The next setting of this register. */
437 struct reg_set *next;
438 /* The insn where it was set. */
439 rtx insn;
440} reg_set;
441static reg_set **reg_set_table;
442/* Size of `reg_set_table'.
443 The table starts out at max_gcse_regno + slop, and is enlarged as
444 necessary. */
445static int reg_set_table_size;
446/* Amount to grow `reg_set_table' by when it's full. */
447#define REG_SET_TABLE_SLOP 100
448
449/* Bitmap containing one bit for each register in the program.
450 Used when performing GCSE to track which registers have been set since
451 the start of the basic block. */
452static sbitmap reg_set_bitmap;
453
454/* For each block, a bitmap of registers set in the block.
455 This is used by expr_killed_p and compute_transp.
456 It is computed during hash table computation and not by compute_sets
457 as it includes registers added since the last pass (or between cprop and
458 gcse) and it's currently not easy to realloc sbitmap vectors. */
459static sbitmap *reg_set_in_block;
460
461/* For each block, non-zero if memory is set in that block.
462 This is computed during hash table computation and is used by
463 expr_killed_p and compute_transp.
464 ??? Handling of memory is very simple, we don't make any attempt
465 to optimize things (later).
466 ??? This can be computed by compute_sets since the information
467 doesn't change. */
468static char *mem_set_in_block;
469
470/* Various variables for statistics gathering. */
471
472/* Memory used in a pass.
473 This isn't intended to be absolutely precise. Its intent is only
474 to keep an eye on memory usage. */
475static int bytes_used;
476/* GCSE substitutions made. */
477static int gcse_subst_count;
478/* Number of copy instructions created. */
479static int gcse_create_count;
480/* Number of constants propagated. */
481static int const_prop_count;
482/* Number of copys propagated. */
483static int copy_prop_count;
7506f491
DE
484\f
485/* These variables are used by classic GCSE.
486 Normally they'd be defined a bit later, but `rd_gen' needs to
487 be declared sooner. */
488
489/* A bitmap of all ones for implementing the algorithm for available
490 expressions and reaching definitions. */
491/* ??? Available expression bitmaps have a different size than reaching
492 definition bitmaps. This should be the larger of the two, however, it
493 is not currently used for reaching definitions. */
494static sbitmap u_bitmap;
495
496/* Each block has a bitmap of each type.
497 The length of each blocks bitmap is:
498
499 max_cuid - for reaching definitions
500 n_exprs - for available expressions
501
502 Thus we view the bitmaps as 2 dimensional arrays. i.e.
503 rd_kill[block_num][cuid_num]
504 ae_kill[block_num][expr_num]
505*/
506
507/* For reaching defs */
508static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
509
510/* for available exprs */
511static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
b5ce41ff 512
0511851c
MM
513/* Objects of this type are passed around by the null-pointer check
514 removal routines. */
515struct null_pointer_info {
516 /* The basic block being processed. */
517 int current_block;
518 /* The first register to be handled in this pass. */
519 int min_reg;
520 /* One greater than the last register to be handled in this pass. */
521 int max_reg;
522 sbitmap *nonnull_local;
523 sbitmap *nonnull_killed;
524};
7506f491 525\f
711d877c
KG
526static void compute_can_copy PARAMS ((void));
527
528static char *gmalloc PARAMS ((unsigned int));
529static char *grealloc PARAMS ((char *, unsigned int));
530static char *gcse_alloc PARAMS ((unsigned long));
531static void alloc_gcse_mem PARAMS ((rtx));
532static void free_gcse_mem PARAMS ((void));
533static void alloc_reg_set_mem PARAMS ((int));
534static void free_reg_set_mem PARAMS ((void));
535static int get_bitmap_width PARAMS ((int, int, int));
536static void record_one_set PARAMS ((int, rtx));
537static void record_set_info PARAMS ((rtx, rtx, void *));
538static void compute_sets PARAMS ((rtx));
539
540static void hash_scan_insn PARAMS ((rtx, int, int));
541static void hash_scan_set PARAMS ((rtx, rtx, int));
542static void hash_scan_clobber PARAMS ((rtx, rtx));
543static void hash_scan_call PARAMS ((rtx, rtx));
544static int want_to_gcse_p PARAMS ((rtx));
545static int oprs_unchanged_p PARAMS ((rtx, rtx, int));
546static int oprs_anticipatable_p PARAMS ((rtx, rtx));
547static int oprs_available_p PARAMS ((rtx, rtx));
548static void insert_expr_in_table PARAMS ((rtx, enum machine_mode,
b5ce41ff 549 rtx, int, int));
711d877c
KG
550static void insert_set_in_table PARAMS ((rtx, rtx));
551static unsigned int hash_expr PARAMS ((rtx, enum machine_mode,
552 int *, int));
553static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *));
554static unsigned int hash_set PARAMS ((int, int));
555static int expr_equiv_p PARAMS ((rtx, rtx));
556static void record_last_reg_set_info PARAMS ((rtx, int));
557static void record_last_mem_set_info PARAMS ((rtx));
558static void record_last_set_info PARAMS ((rtx, rtx, void *));
559static void compute_hash_table PARAMS ((int));
560static void alloc_set_hash_table PARAMS ((int));
561static void free_set_hash_table PARAMS ((void));
562static void compute_set_hash_table PARAMS ((void));
563static void alloc_expr_hash_table PARAMS ((int));
564static void free_expr_hash_table PARAMS ((void));
565static void compute_expr_hash_table PARAMS ((void));
566static void dump_hash_table PARAMS ((FILE *, const char *,
567 struct expr **, int, int));
568static struct expr *lookup_expr PARAMS ((rtx));
569static struct expr *lookup_set PARAMS ((int, rtx));
570static struct expr *next_set PARAMS ((int, struct expr *));
571static void reset_opr_set_tables PARAMS ((void));
572static int oprs_not_set_p PARAMS ((rtx, rtx));
573static void mark_call PARAMS ((rtx));
574static void mark_set PARAMS ((rtx, rtx));
575static void mark_clobber PARAMS ((rtx, rtx));
576static void mark_oprs_set PARAMS ((rtx));
577
578static void alloc_cprop_mem PARAMS ((int, int));
579static void free_cprop_mem PARAMS ((void));
580static void compute_transp PARAMS ((rtx, int, sbitmap *, int));
581static void compute_transpout PARAMS ((void));
582static void compute_local_properties PARAMS ((sbitmap *, sbitmap *,
583 sbitmap *, int));
584static void compute_cprop_data PARAMS ((void));
585static void find_used_regs PARAMS ((rtx));
586static int try_replace_reg PARAMS ((rtx, rtx, rtx));
587static struct expr *find_avail_set PARAMS ((int, rtx));
588static int cprop_jump PARAMS ((rtx, rtx, struct reg_use *, rtx));
e2bef702 589#ifdef HAVE_cc0
711d877c 590static int cprop_cc0_jump PARAMS ((rtx, struct reg_use *, rtx));
e2bef702 591#endif
711d877c
KG
592static int cprop_insn PARAMS ((rtx, int));
593static int cprop PARAMS ((int));
594static int one_cprop_pass PARAMS ((int, int));
595
596static void alloc_pre_mem PARAMS ((int, int));
597static void free_pre_mem PARAMS ((void));
598static void compute_pre_data PARAMS ((void));
599static int pre_expr_reaches_here_p PARAMS ((int, struct expr *, int));
600static void insert_insn_end_bb PARAMS ((struct expr *, int, int));
601static void pre_insert_copy_insn PARAMS ((struct expr *, rtx));
602static void pre_insert_copies PARAMS ((void));
603static int pre_delete PARAMS ((void));
604static int pre_gcse PARAMS ((void));
605static int one_pre_gcse_pass PARAMS ((int));
606
607static void add_label_notes PARAMS ((rtx, rtx));
608
609static void alloc_code_hoist_mem PARAMS ((int, int));
610static void free_code_hoist_mem PARAMS ((void));
611static void compute_code_hoist_vbeinout PARAMS ((void));
612static void compute_code_hoist_data PARAMS ((void));
613static int hoist_expr_reaches_here_p PARAMS ((int, int, int, char *));
614static void hoist_code PARAMS ((void));
615static int one_code_hoisting_pass PARAMS ((void));
616
617static void alloc_rd_mem PARAMS ((int, int));
618static void free_rd_mem PARAMS ((void));
619static void handle_rd_kill_set PARAMS ((rtx, int, int));
620static void compute_kill_rd PARAMS ((void));
621static void compute_rd PARAMS ((void));
622static void alloc_avail_expr_mem PARAMS ((int, int));
623static void free_avail_expr_mem PARAMS ((void));
624static void compute_ae_gen PARAMS ((void));
625static int expr_killed_p PARAMS ((rtx, int));
626static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *));
627static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *,
628 int, int));
629static rtx computing_insn PARAMS ((struct expr *, rtx));
630static int def_reaches_here_p PARAMS ((rtx, rtx));
631static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int));
632static int handle_avail_expr PARAMS ((rtx, struct expr *));
633static int classic_gcse PARAMS ((void));
634static int one_classic_gcse_pass PARAMS ((int));
635static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *));
636static void delete_null_pointer_checks_1 PARAMS ((int *, sbitmap *, sbitmap *,
637 struct null_pointer_info *));
638static rtx process_insert_insn PARAMS ((struct expr *));
639static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **));
640static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *,
641 int, int, char *));
642static int pre_expr_reaches_here_p_work PARAMS ((int, struct expr *,
643 int, char *));
7506f491
DE
644\f
645/* Entry point for global common subexpression elimination.
646 F is the first instruction in the function. */
647
e78d9500 648int
7506f491
DE
649gcse_main (f, file)
650 rtx f;
651 FILE *file;
652{
653 int changed, pass;
654 /* Bytes used at start of pass. */
655 int initial_bytes_used;
656 /* Maximum number of bytes used by a pass. */
657 int max_pass_bytes;
658 /* Point to release obstack data from for each pass. */
659 char *gcse_obstack_bottom;
660
b5ce41ff
JL
661 /* We do not construct an accurate cfg in functions which call
662 setjmp, so just punt to be safe. */
7506f491 663 if (current_function_calls_setjmp)
e78d9500 664 return 0;
7506f491 665
b5ce41ff
JL
666 /* Assume that we do not need to run jump optimizations after gcse. */
667 run_jump_opt_after_gcse = 0;
668
7506f491
DE
669 /* For calling dump_foo fns from gdb. */
670 debug_stderr = stderr;
b5ce41ff 671 gcse_file = file;
7506f491 672
b5ce41ff
JL
673 /* Identify the basic block information for this function, including
674 successors and predecessors. */
7506f491 675 max_gcse_regno = max_reg_num ();
359da67d 676 find_basic_blocks (f, max_gcse_regno, file, 1);
7506f491 677
a42cd965
AM
678 if (file)
679 dump_flow_info (file);
680
7506f491
DE
681 /* Return if there's nothing to do. */
682 if (n_basic_blocks <= 1)
683 {
684 /* Free storage allocated by find_basic_blocks. */
685 free_basic_block_vars (0);
e78d9500 686 return 0;
7506f491
DE
687 }
688
55f7891b
JL
689 /* Trying to perform global optimizations on flow graphs which have
690 a high connectivity will take a long time and is unlikely to be
691 particularly useful.
692
693 In normal circumstances a cfg should have about twice has many edges
694 as blocks. But we do not want to punish small functions which have
695 a couple switch statements. So we require a relatively large number
696 of basic blocks and the ratio of edges to blocks to be high. */
697 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
698 {
699 /* Free storage allocated by find_basic_blocks. */
700 free_basic_block_vars (0);
701 return 0;
702 }
703
7506f491
DE
704 /* See what modes support reg/reg copy operations. */
705 if (! can_copy_init_p)
706 {
707 compute_can_copy ();
708 can_copy_init_p = 1;
709 }
710
711 gcc_obstack_init (&gcse_obstack);
a42cd965 712 bytes_used = 0;
7506f491
DE
713
714 /* Record where pseudo-registers are set.
715 This data is kept accurate during each pass.
b5ce41ff 716 ??? We could also record hard-reg information here
7506f491 717 [since it's unchanging], however it is currently done during
b5ce41ff
JL
718 hash table computation.
719
720 It may be tempting to compute MEM set information here too, but MEM
721 sets will be subject to code motion one day and thus we need to compute
722 information about memory sets when we build the hash tables. */
7506f491
DE
723
724 alloc_reg_set_mem (max_gcse_regno);
725 compute_sets (f);
726
727 pass = 0;
728 initial_bytes_used = bytes_used;
729 max_pass_bytes = 0;
730 gcse_obstack_bottom = gcse_alloc (1);
731 changed = 1;
732 while (changed && pass < MAX_PASSES)
733 {
734 changed = 0;
735 if (file)
736 fprintf (file, "GCSE pass %d\n\n", pass + 1);
737
738 /* Initialize bytes_used to the space for the pred/succ lists,
739 and the reg_set_table data. */
740 bytes_used = initial_bytes_used;
741
742 /* Each pass may create new registers, so recalculate each time. */
743 max_gcse_regno = max_reg_num ();
744
745 alloc_gcse_mem (f);
746
b5ce41ff
JL
747 /* Don't allow constant propagation to modify jumps
748 during this pass. */
749 changed = one_cprop_pass (pass + 1, 0);
7506f491
DE
750
751 if (optimize_size)
b5ce41ff 752 changed |= one_classic_gcse_pass (pass + 1);
7506f491 753 else
a42cd965
AM
754 {
755 changed |= one_pre_gcse_pass (pass + 1);
756 free_reg_set_mem ();
757 alloc_reg_set_mem (max_reg_num ());
758 compute_sets (f);
759 run_jump_opt_after_gcse = 1;
760 }
7506f491
DE
761
762 if (max_pass_bytes < bytes_used)
763 max_pass_bytes = bytes_used;
764
bb457bd9
JL
765 /* Free up memory, then reallocate for code hoisting. We can
766 not re-use the existing allocated memory because the tables
767 will not have info for the insns or registers created by
768 partial redundancy elimination. */
7506f491
DE
769 free_gcse_mem ();
770
bb457bd9
JL
771 /* It does not make sense to run code hoisting unless we optimizing
772 for code size -- it rarely makes programs faster, and can make
773 them bigger if we did partial redundancy elimination (when optimizing
774 for space, we use a classic gcse algorithm instead of partial
775 redundancy algorithms). */
776 if (optimize_size)
777 {
778 max_gcse_regno = max_reg_num ();
779 alloc_gcse_mem (f);
780 changed |= one_code_hoisting_pass ();
781 free_gcse_mem ();
782
783 if (max_pass_bytes < bytes_used)
784 max_pass_bytes = bytes_used;
785 }
786
7506f491
DE
787 if (file)
788 {
789 fprintf (file, "\n");
790 fflush (file);
791 }
792 obstack_free (&gcse_obstack, gcse_obstack_bottom);
793 pass++;
794 }
795
b5ce41ff
JL
796 /* Do one last pass of copy propagation, including cprop into
797 conditional jumps. */
798
799 max_gcse_regno = max_reg_num ();
800 alloc_gcse_mem (f);
801 /* This time, go ahead and allow cprop to alter jumps. */
802 one_cprop_pass (pass + 1, 1);
803 free_gcse_mem ();
7506f491
DE
804
805 if (file)
806 {
807 fprintf (file, "GCSE of %s: %d basic blocks, ",
808 current_function_name, n_basic_blocks);
809 fprintf (file, "%d pass%s, %d bytes\n\n",
810 pass, pass > 1 ? "es" : "", max_pass_bytes);
811 }
812
813 /* Free our obstack. */
814 obstack_free (&gcse_obstack, NULL_PTR);
815 /* Free reg_set_table. */
816 free_reg_set_mem ();
7506f491
DE
817 /* Free storage allocated by find_basic_blocks. */
818 free_basic_block_vars (0);
e78d9500 819 return run_jump_opt_after_gcse;
7506f491
DE
820}
821\f
822/* Misc. utilities. */
823
824/* Compute which modes support reg/reg copy operations. */
825
826static void
827compute_can_copy ()
828{
829 int i;
50b2596f 830#ifndef AVOID_CCMODE_COPIES
7506f491 831 rtx reg,insn;
50b2596f 832#endif
7506f491
DE
833 char *free_point = (char *) oballoc (1);
834
835 bzero (can_copy_p, NUM_MACHINE_MODES);
836
837 start_sequence ();
838 for (i = 0; i < NUM_MACHINE_MODES; i++)
839 {
840 switch (GET_MODE_CLASS (i))
841 {
842 case MODE_CC :
843#ifdef AVOID_CCMODE_COPIES
844 can_copy_p[i] = 0;
845#else
9e6a5703
JC
846 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
847 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
7506f491
DE
848 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
849 can_copy_p[i] = 1;
850#endif
851 break;
852 default :
853 can_copy_p[i] = 1;
854 break;
855 }
856 }
857 end_sequence ();
858
859 /* Free the objects we just allocated. */
860 obfree (free_point);
861}
862\f
863/* Cover function to xmalloc to record bytes allocated. */
864
865static char *
866gmalloc (size)
867 unsigned int size;
868{
869 bytes_used += size;
870 return xmalloc (size);
871}
872
873/* Cover function to xrealloc.
874 We don't record the additional size since we don't know it.
875 It won't affect memory usage stats much anyway. */
876
877static char *
878grealloc (ptr, size)
879 char *ptr;
880 unsigned int size;
881{
882 return xrealloc (ptr, size);
883}
884
885/* Cover function to obstack_alloc.
886 We don't need to record the bytes allocated here since
887 obstack_chunk_alloc is set to gmalloc. */
888
889static char *
890gcse_alloc (size)
891 unsigned long size;
892{
893 return (char *) obstack_alloc (&gcse_obstack, size);
894}
895
896/* Allocate memory for the cuid mapping array,
897 and reg/memory set tracking tables.
898
899 This is called at the start of each pass. */
900
901static void
902alloc_gcse_mem (f)
903 rtx f;
904{
905 int i,n;
906 rtx insn;
907
908 /* Find the largest UID and create a mapping from UIDs to CUIDs.
909 CUIDs are like UIDs except they increase monotonically, have no gaps,
910 and only apply to real insns. */
911
912 max_uid = get_max_uid ();
913 n = (max_uid + 1) * sizeof (int);
914 uid_cuid = (int *) gmalloc (n);
915 bzero ((char *) uid_cuid, n);
916 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
917 {
918 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
919 INSN_CUID (insn) = i++;
920 else
921 INSN_CUID (insn) = i;
922 }
923
924 /* Create a table mapping cuids to insns. */
925
926 max_cuid = i;
927 n = (max_cuid + 1) * sizeof (rtx);
928 cuid_insn = (rtx *) gmalloc (n);
929 bzero ((char *) cuid_insn, n);
930 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
931 {
932 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
933 {
934 CUID_INSN (i) = insn;
935 i++;
936 }
937 }
938
939 /* Allocate vars to track sets of regs. */
940
941 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
942
943 /* Allocate vars to track sets of regs, memory per block. */
944
945 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
946 max_gcse_regno);
947 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
948}
949
950/* Free memory allocated by alloc_gcse_mem. */
951
952static void
953free_gcse_mem ()
954{
955 free (uid_cuid);
956 free (cuid_insn);
957
958 free (reg_set_bitmap);
959
960 free (reg_set_in_block);
961 free (mem_set_in_block);
962}
963
0511851c
MM
964/* Many of the global optimization algorithms work by solving dataflow
965 equations for various expressions. Initially, some local value is
966 computed for each expression in each block. Then, the values
967 across the various blocks are combined (by following flow graph
968 edges) to arrive at global values. Conceptually, each set of
969 equations is independent. We may therefore solve all the equations
970 in parallel, solve them one at a time, or pick any intermediate
971 approach.
972
973 When you're going to need N two-dimensional bitmaps, each X (say,
974 the number of blocks) by Y (say, the number of expressions), call
975 this function. It's not important what X and Y represent; only
976 that Y correspond to the things that can be done in parallel. This
977 function will return an appropriate chunking factor C; you should
978 solve C sets of equations in parallel. By going through this
979 function, we can easily trade space against time; by solving fewer
980 equations in parallel we use less space. */
981
982static int
983get_bitmap_width (n, x, y)
984 int n;
985 int x;
986 int y;
987{
988 /* It's not really worth figuring out *exactly* how much memory will
989 be used by a particular choice. The important thing is to get
990 something approximately right. */
991 size_t max_bitmap_memory = 10 * 1024 * 1024;
992
993 /* The number of bytes we'd use for a single column of minimum
994 width. */
995 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
996
997 /* Often, it's reasonable just to solve all the equations in
998 parallel. */
999 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
1000 return y;
1001
1002 /* Otherwise, pick the largest width we can, without going over the
1003 limit. */
1004 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
1005 / column_size);
1006}
1007
b5ce41ff
JL
1008\f
1009/* Compute the local properties of each recorded expression.
1010 Local properties are those that are defined by the block, irrespective
1011 of other blocks.
1012
1013 An expression is transparent in a block if its operands are not modified
1014 in the block.
1015
1016 An expression is computed (locally available) in a block if it is computed
1017 at least once and expression would contain the same value if the
1018 computation was moved to the end of the block.
1019
1020 An expression is locally anticipatable in a block if it is computed at
1021 least once and expression would contain the same value if the computation
1022 was moved to the beginning of the block.
1023
1024 We call this routine for cprop, pre and code hoisting. They all
1025 compute basically the same information and thus can easily share
1026 this code.
7506f491 1027
b5ce41ff
JL
1028 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording
1029 local properties. If NULL, then it is not necessary to compute
1030 or record that particular property.
1031
1032 SETP controls which hash table to look at. If zero, this routine
1033 looks at the expr hash table; if nonzero this routine looks at
695ab36a
BS
1034 the set hash table. Additionally, TRANSP is computed as ~TRANSP,
1035 since this is really cprop's ABSALTERED. */
b5ce41ff
JL
1036
1037static void
1038compute_local_properties (transp, comp, antloc, setp)
1039 sbitmap *transp;
1040 sbitmap *comp;
1041 sbitmap *antloc;
1042 int setp;
1043{
1044 int i, hash_table_size;
1045 struct expr **hash_table;
1046
1047 /* Initialize any bitmaps that were passed in. */
1048 if (transp)
695ab36a
BS
1049 {
1050 if (setp)
1051 sbitmap_vector_zero (transp, n_basic_blocks);
1052 else
1053 sbitmap_vector_ones (transp, n_basic_blocks);
1054 }
b5ce41ff
JL
1055 if (comp)
1056 sbitmap_vector_zero (comp, n_basic_blocks);
1057 if (antloc)
1058 sbitmap_vector_zero (antloc, n_basic_blocks);
1059
1060 /* We use the same code for cprop, pre and hoisting. For cprop
1061 we care about the set hash table, for pre and hoisting we
1062 care about the expr hash table. */
1063 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size;
1064 hash_table = setp ? set_hash_table : expr_hash_table;
1065
1066 for (i = 0; i < hash_table_size; i++)
7506f491 1067 {
b5ce41ff
JL
1068 struct expr *expr;
1069
1070 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash)
1071 {
1072 struct occr *occr;
1073 int indx = expr->bitmap_index;
1074
1075 /* The expression is transparent in this block if it is not killed.
1076 We start by assuming all are transparent [none are killed], and
1077 then reset the bits for those that are. */
1078
1079 if (transp)
1080 compute_transp (expr->expr, indx, transp, setp);
1081
1082 /* The occurrences recorded in antic_occr are exactly those that
1083 we want to set to non-zero in ANTLOC. */
1084
1085 if (antloc)
1086 {
1087 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
1088 {
1089 int bb = BLOCK_NUM (occr->insn);
1090 SET_BIT (antloc[bb], indx);
1091
1092 /* While we're scanning the table, this is a good place to
1093 initialize this. */
1094 occr->deleted_p = 0;
1095 }
1096 }
1097
1098 /* The occurrences recorded in avail_occr are exactly those that
1099 we want to set to non-zero in COMP. */
1100 if (comp)
1101 {
1102
1103 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1104 {
1105 int bb = BLOCK_NUM (occr->insn);
1106 SET_BIT (comp[bb], indx);
1107
1108 /* While we're scanning the table, this is a good place to
1109 initialize this. */
1110 occr->copied_p = 0;
1111 }
1112 }
1113
1114 /* While we're scanning the table, this is a good place to
1115 initialize this. */
1116 expr->reaching_reg = 0;
1117 }
7506f491 1118 }
7506f491 1119}
b5ce41ff 1120
7506f491
DE
1121\f
1122/* Register set information.
1123
1124 `reg_set_table' records where each register is set or otherwise
1125 modified. */
1126
1127static struct obstack reg_set_obstack;
1128
1129static void
1130alloc_reg_set_mem (n_regs)
1131 int n_regs;
1132{
1133 int n;
1134
1135 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
1136 n = reg_set_table_size * sizeof (struct reg_set *);
1137 reg_set_table = (struct reg_set **) gmalloc (n);
1138 bzero ((char *) reg_set_table, n);
1139
1140 gcc_obstack_init (&reg_set_obstack);
1141}
1142
1143static void
1144free_reg_set_mem ()
1145{
1146 free (reg_set_table);
1147 obstack_free (&reg_set_obstack, NULL_PTR);
1148}
1149
1150/* Record REGNO in the reg_set table. */
1151
1152static void
1153record_one_set (regno, insn)
1154 int regno;
1155 rtx insn;
1156{
1157 /* allocate a new reg_set element and link it onto the list */
1158 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
1159
1160 /* If the table isn't big enough, enlarge it. */
1161 if (regno >= reg_set_table_size)
1162 {
1163 int new_size = regno + REG_SET_TABLE_SLOP;
1164 reg_set_table = (struct reg_set **)
1165 grealloc ((char *) reg_set_table,
1166 new_size * sizeof (struct reg_set *));
1167 bzero ((char *) (reg_set_table + reg_set_table_size),
1168 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
1169 reg_set_table_size = new_size;
1170 }
1171
1172 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
1173 sizeof (struct reg_set));
1174 bytes_used += sizeof (struct reg_set);
1175 new_reg_info->insn = insn;
1176 new_reg_info->next = NULL;
1177 if (reg_set_table[regno] == NULL)
1178 reg_set_table[regno] = new_reg_info;
1179 else
1180 {
1181 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1182 /* ??? One could keep a "last" pointer to speed this up. */
1183 while (reg_info_ptr1 != NULL)
1184 {
1185 reg_info_ptr2 = reg_info_ptr1;
1186 reg_info_ptr1 = reg_info_ptr1->next;
1187 }
1188 reg_info_ptr2->next = new_reg_info;
1189 }
1190}
1191
7506f491 1192/* Called from compute_sets via note_stores to handle one
84832317
MM
1193 SET or CLOBBER in an insn. The DATA is really the instruction
1194 in which the SET is occurring. */
7506f491
DE
1195
1196static void
84832317 1197record_set_info (dest, setter, data)
50b2596f 1198 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 1199 void *data;
7506f491 1200{
84832317
MM
1201 rtx record_set_insn = (rtx) data;
1202
7506f491
DE
1203 if (GET_CODE (dest) == SUBREG)
1204 dest = SUBREG_REG (dest);
1205
1206 if (GET_CODE (dest) == REG)
1207 {
1208 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1209 record_one_set (REGNO (dest), record_set_insn);
1210 }
1211}
1212
1213/* Scan the function and record each set of each pseudo-register.
1214
1215 This is called once, at the start of the gcse pass.
1216 See the comments for `reg_set_table' for further docs. */
1217
1218static void
1219compute_sets (f)
1220 rtx f;
1221{
1222 rtx insn = f;
1223
1224 while (insn)
1225 {
1226 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
84832317 1227 note_stores (PATTERN (insn), record_set_info, insn);
7506f491
DE
1228 insn = NEXT_INSN (insn);
1229 }
1230}
1231\f
1232/* Hash table support. */
1233
b86ba9c8
GK
1234#define NEVER_SET -1
1235
7506f491 1236/* For each register, the cuid of the first/last insn in the block to set it,
e7d99f1e 1237 or -1 if not set. */
7506f491
DE
1238static int *reg_first_set;
1239static int *reg_last_set;
1240
1241/* While computing "first/last set" info, this is the CUID of first/last insn
e7d99f1e 1242 to set memory or -1 if not set. `mem_last_set' is also used when
7506f491
DE
1243 performing GCSE to record whether memory has been set since the beginning
1244 of the block.
1245 Note that handling of memory is very simple, we don't make any attempt
1246 to optimize things (later). */
1247static int mem_first_set;
1248static int mem_last_set;
1249
7506f491
DE
1250/* Perform a quick check whether X, the source of a set, is something
1251 we want to consider for GCSE. */
1252
1253static int
1254want_to_gcse_p (x)
1255 rtx x;
1256{
1257 enum rtx_code code = GET_CODE (x);
1258
1259 switch (code)
1260 {
1261 case REG:
1262 case SUBREG:
1263 case CONST_INT:
1264 case CONST_DOUBLE:
1265 case CALL:
1266 return 0;
1267
1268 default:
1269 break;
1270 }
1271
1272 return 1;
1273}
1274
1275/* Return non-zero if the operands of expression X are unchanged from the
1276 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1277 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1278
1279static int
1280oprs_unchanged_p (x, insn, avail_p)
1281 rtx x, insn;
1282 int avail_p;
1283{
1284 int i;
1285 enum rtx_code code;
6f7d635c 1286 const char *fmt;
7506f491
DE
1287
1288 /* repeat is used to turn tail-recursion into iteration. */
1289 repeat:
1290
1291 if (x == 0)
1292 return 1;
1293
1294 code = GET_CODE (x);
1295 switch (code)
1296 {
1297 case REG:
1298 if (avail_p)
b86ba9c8 1299 return (reg_last_set[REGNO (x)] == NEVER_SET
7506f491
DE
1300 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1301 else
b86ba9c8 1302 return (reg_first_set[REGNO (x)] == NEVER_SET
7506f491
DE
1303 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1304
1305 case MEM:
1306 if (avail_p)
1307 {
b86ba9c8 1308 if (mem_last_set != NEVER_SET
7506f491
DE
1309 && mem_last_set >= INSN_CUID (insn))
1310 return 0;
1311 }
1312 else
1313 {
b86ba9c8 1314 if (mem_first_set != NEVER_SET
7506f491
DE
1315 && mem_first_set < INSN_CUID (insn))
1316 return 0;
1317 }
1318 x = XEXP (x, 0);
1319 goto repeat;
1320
1321 case PRE_DEC:
1322 case PRE_INC:
1323 case POST_DEC:
1324 case POST_INC:
1325 return 0;
1326
1327 case PC:
1328 case CC0: /*FIXME*/
1329 case CONST:
1330 case CONST_INT:
1331 case CONST_DOUBLE:
1332 case SYMBOL_REF:
1333 case LABEL_REF:
1334 case ADDR_VEC:
1335 case ADDR_DIFF_VEC:
1336 return 1;
1337
1338 default:
1339 break;
1340 }
1341
1342 i = GET_RTX_LENGTH (code) - 1;
1343 fmt = GET_RTX_FORMAT (code);
1344 for (; i >= 0; i--)
1345 {
1346 if (fmt[i] == 'e')
1347 {
1348 rtx tem = XEXP (x, i);
1349
1350 /* If we are about to do the last recursive call
1351 needed at this level, change it into iteration.
1352 This function is called enough to be worth it. */
1353 if (i == 0)
1354 {
1355 x = tem;
1356 goto repeat;
1357 }
1358 if (! oprs_unchanged_p (tem, insn, avail_p))
1359 return 0;
1360 }
1361 else if (fmt[i] == 'E')
1362 {
1363 int j;
1364 for (j = 0; j < XVECLEN (x, i); j++)
1365 {
1366 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1367 return 0;
1368 }
1369 }
1370 }
1371
1372 return 1;
1373}
1374
1375/* Return non-zero if the operands of expression X are unchanged from
1376 the start of INSN's basic block up to but not including INSN. */
1377
1378static int
1379oprs_anticipatable_p (x, insn)
1380 rtx x, insn;
1381{
1382 return oprs_unchanged_p (x, insn, 0);
1383}
1384
1385/* Return non-zero if the operands of expression X are unchanged from
1386 INSN to the end of INSN's basic block. */
1387
1388static int
1389oprs_available_p (x, insn)
1390 rtx x, insn;
1391{
1392 return oprs_unchanged_p (x, insn, 1);
1393}
1394
1395/* Hash expression X.
1396 MODE is only used if X is a CONST_INT.
1397 A boolean indicating if a volatile operand is found or if the expression
1398 contains something we don't want to insert in the table is stored in
1399 DO_NOT_RECORD_P.
1400
1401 ??? One might want to merge this with canon_hash. Later. */
1402
1403static unsigned int
1404hash_expr (x, mode, do_not_record_p, hash_table_size)
1405 rtx x;
1406 enum machine_mode mode;
1407 int *do_not_record_p;
1408 int hash_table_size;
1409{
1410 unsigned int hash;
1411
1412 *do_not_record_p = 0;
1413
1414 hash = hash_expr_1 (x, mode, do_not_record_p);
1415 return hash % hash_table_size;
1416}
1417
1418/* Subroutine of hash_expr to do the actual work. */
1419
1420static unsigned int
1421hash_expr_1 (x, mode, do_not_record_p)
1422 rtx x;
1423 enum machine_mode mode;
1424 int *do_not_record_p;
1425{
1426 int i, j;
1427 unsigned hash = 0;
1428 enum rtx_code code;
6f7d635c 1429 const char *fmt;
7506f491
DE
1430
1431 /* repeat is used to turn tail-recursion into iteration. */
1432 repeat:
1433
1434 if (x == 0)
1435 return hash;
1436
1437 code = GET_CODE (x);
1438 switch (code)
1439 {
1440 case REG:
1441 {
1442 register int regno = REGNO (x);
1443 hash += ((unsigned) REG << 7) + regno;
1444 return hash;
1445 }
1446
1447 case CONST_INT:
1448 {
1449 unsigned HOST_WIDE_INT tem = INTVAL (x);
1450 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1451 return hash;
1452 }
1453
1454 case CONST_DOUBLE:
1455 /* This is like the general case, except that it only counts
1456 the integers representing the constant. */
1457 hash += (unsigned) code + (unsigned) GET_MODE (x);
1458 if (GET_MODE (x) != VOIDmode)
1459 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1460 {
8a34409d 1461 unsigned tem = XWINT (x, i);
7506f491
DE
1462 hash += tem;
1463 }
1464 else
1465 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1466 + (unsigned) CONST_DOUBLE_HIGH (x));
1467 return hash;
1468
1469 /* Assume there is only one rtx object for any given label. */
1470 case LABEL_REF:
1471 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1472 differences and differences between each stage's debugging dumps. */
1473 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1474 return hash;
1475
1476 case SYMBOL_REF:
1477 {
1478 /* Don't hash on the symbol's address to avoid bootstrap differences.
1479 Different hash values may cause expressions to be recorded in
1480 different orders and thus different registers to be used in the
1481 final assembler. This also avoids differences in the dump files
1482 between various stages. */
1483 unsigned int h = 0;
1484 unsigned char *p = (unsigned char *) XSTR (x, 0);
1485 while (*p)
1486 h += (h << 7) + *p++; /* ??? revisit */
1487 hash += ((unsigned) SYMBOL_REF << 7) + h;
1488 return hash;
1489 }
1490
1491 case MEM:
1492 if (MEM_VOLATILE_P (x))
1493 {
1494 *do_not_record_p = 1;
1495 return 0;
1496 }
1497 hash += (unsigned) MEM;
297c3335 1498 hash += MEM_ALIAS_SET (x);
7506f491
DE
1499 x = XEXP (x, 0);
1500 goto repeat;
1501
1502 case PRE_DEC:
1503 case PRE_INC:
1504 case POST_DEC:
1505 case POST_INC:
1506 case PC:
1507 case CC0:
1508 case CALL:
1509 case UNSPEC_VOLATILE:
1510 *do_not_record_p = 1;
1511 return 0;
1512
1513 case ASM_OPERANDS:
1514 if (MEM_VOLATILE_P (x))
1515 {
1516 *do_not_record_p = 1;
1517 return 0;
1518 }
1519
1520 default:
1521 break;
1522 }
1523
1524 i = GET_RTX_LENGTH (code) - 1;
1525 hash += (unsigned) code + (unsigned) GET_MODE (x);
1526 fmt = GET_RTX_FORMAT (code);
1527 for (; i >= 0; i--)
1528 {
1529 if (fmt[i] == 'e')
1530 {
1531 rtx tem = XEXP (x, i);
1532
1533 /* If we are about to do the last recursive call
1534 needed at this level, change it into iteration.
1535 This function is called enough to be worth it. */
1536 if (i == 0)
1537 {
1538 x = tem;
1539 goto repeat;
1540 }
1541 hash += hash_expr_1 (tem, 0, do_not_record_p);
1542 if (*do_not_record_p)
1543 return 0;
1544 }
1545 else if (fmt[i] == 'E')
1546 for (j = 0; j < XVECLEN (x, i); j++)
1547 {
1548 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1549 if (*do_not_record_p)
1550 return 0;
1551 }
1552 else if (fmt[i] == 's')
1553 {
1554 register unsigned char *p = (unsigned char *) XSTR (x, i);
1555 if (p)
1556 while (*p)
1557 hash += *p++;
1558 }
1559 else if (fmt[i] == 'i')
1560 {
1561 register unsigned tem = XINT (x, i);
1562 hash += tem;
1563 }
1564 else
1565 abort ();
1566 }
1567
1568 return hash;
1569}
1570
1571/* Hash a set of register REGNO.
1572
1573 Sets are hashed on the register that is set.
1574 This simplifies the PRE copy propagation code.
1575
1576 ??? May need to make things more elaborate. Later, as necessary. */
1577
1578static unsigned int
1579hash_set (regno, hash_table_size)
1580 int regno;
1581 int hash_table_size;
1582{
1583 unsigned int hash;
1584
1585 hash = regno;
1586 return hash % hash_table_size;
1587}
1588
1589/* Return non-zero if exp1 is equivalent to exp2.
1590 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1591
1592static int
1593expr_equiv_p (x, y)
1594 rtx x, y;
1595{
1596 register int i, j;
1597 register enum rtx_code code;
6f7d635c 1598 register const char *fmt;
7506f491
DE
1599
1600 if (x == y)
1601 return 1;
1602 if (x == 0 || y == 0)
1603 return x == y;
1604
1605 code = GET_CODE (x);
1606 if (code != GET_CODE (y))
1607 return 0;
1608
1609 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1610 if (GET_MODE (x) != GET_MODE (y))
1611 return 0;
1612
1613 switch (code)
1614 {
1615 case PC:
1616 case CC0:
1617 return x == y;
1618
1619 case CONST_INT:
1620 return INTVAL (x) == INTVAL (y);
1621
1622 case LABEL_REF:
1623 return XEXP (x, 0) == XEXP (y, 0);
1624
1625 case SYMBOL_REF:
1626 return XSTR (x, 0) == XSTR (y, 0);
1627
1628 case REG:
1629 return REGNO (x) == REGNO (y);
1630
297c3335
RH
1631 case MEM:
1632 /* Can't merge two expressions in different alias sets, since we can
1633 decide that the expression is transparent in a block when it isn't,
1634 due to it being set with the different alias set. */
1635 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y))
1636 return 0;
1637 break;
1638
7506f491
DE
1639 /* For commutative operations, check both orders. */
1640 case PLUS:
1641 case MULT:
1642 case AND:
1643 case IOR:
1644 case XOR:
1645 case NE:
1646 case EQ:
1647 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1648 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1649 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1650 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1651
1652 default:
1653 break;
1654 }
1655
1656 /* Compare the elements. If any pair of corresponding elements
1657 fail to match, return 0 for the whole thing. */
1658
1659 fmt = GET_RTX_FORMAT (code);
1660 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1661 {
1662 switch (fmt[i])
1663 {
1664 case 'e':
1665 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1666 return 0;
1667 break;
1668
1669 case 'E':
1670 if (XVECLEN (x, i) != XVECLEN (y, i))
1671 return 0;
1672 for (j = 0; j < XVECLEN (x, i); j++)
1673 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1674 return 0;
1675 break;
1676
1677 case 's':
1678 if (strcmp (XSTR (x, i), XSTR (y, i)))
1679 return 0;
1680 break;
1681
1682 case 'i':
1683 if (XINT (x, i) != XINT (y, i))
1684 return 0;
1685 break;
1686
1687 case 'w':
1688 if (XWINT (x, i) != XWINT (y, i))
1689 return 0;
1690 break;
1691
1692 case '0':
1693 break;
1694
1695 default:
1696 abort ();
1697 }
1698 }
1699
1700 return 1;
1701}
1702
1703/* Insert expression X in INSN in the hash table.
1704 If it is already present, record it as the last occurrence in INSN's
1705 basic block.
1706
1707 MODE is the mode of the value X is being stored into.
1708 It is only used if X is a CONST_INT.
1709
1710 ANTIC_P is non-zero if X is an anticipatable expression.
1711 AVAIL_P is non-zero if X is an available expression. */
1712
1713static void
1714insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1715 rtx x;
1716 enum machine_mode mode;
1717 rtx insn;
1718 int antic_p, avail_p;
1719{
1720 int found, do_not_record_p;
1721 unsigned int hash;
1722 struct expr *cur_expr, *last_expr = NULL;
1723 struct occr *antic_occr, *avail_occr;
1724 struct occr *last_occr = NULL;
1725
1726 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1727
1728 /* Do not insert expression in table if it contains volatile operands,
1729 or if hash_expr determines the expression is something we don't want
1730 to or can't handle. */
1731 if (do_not_record_p)
1732 return;
1733
1734 cur_expr = expr_hash_table[hash];
1735 found = 0;
1736
1737 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1738 {
1739 /* If the expression isn't found, save a pointer to the end of
1740 the list. */
1741 last_expr = cur_expr;
1742 cur_expr = cur_expr->next_same_hash;
1743 }
1744
1745 if (! found)
1746 {
1747 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1748 bytes_used += sizeof (struct expr);
1749 if (expr_hash_table[hash] == NULL)
1750 {
1751 /* This is the first pattern that hashed to this index. */
1752 expr_hash_table[hash] = cur_expr;
1753 }
1754 else
1755 {
1756 /* Add EXPR to end of this hash chain. */
1757 last_expr->next_same_hash = cur_expr;
1758 }
1759 /* Set the fields of the expr element. */
1760 cur_expr->expr = x;
1761 cur_expr->bitmap_index = n_exprs++;
1762 cur_expr->next_same_hash = NULL;
1763 cur_expr->antic_occr = NULL;
1764 cur_expr->avail_occr = NULL;
1765 }
1766
1767 /* Now record the occurrence(s). */
1768
1769 if (antic_p)
1770 {
1771 antic_occr = cur_expr->antic_occr;
1772
1773 /* Search for another occurrence in the same basic block. */
1774 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1775 {
1776 /* If an occurrence isn't found, save a pointer to the end of
1777 the list. */
1778 last_occr = antic_occr;
1779 antic_occr = antic_occr->next;
1780 }
1781
1782 if (antic_occr)
1783 {
1784 /* Found another instance of the expression in the same basic block.
1785 Prefer the currently recorded one. We want the first one in the
1786 block and the block is scanned from start to end. */
1787 ; /* nothing to do */
1788 }
1789 else
1790 {
1791 /* First occurrence of this expression in this basic block. */
1792 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1793 bytes_used += sizeof (struct occr);
1794 /* First occurrence of this expression in any block? */
1795 if (cur_expr->antic_occr == NULL)
1796 cur_expr->antic_occr = antic_occr;
1797 else
1798 last_occr->next = antic_occr;
1799 antic_occr->insn = insn;
1800 antic_occr->next = NULL;
1801 }
1802 }
1803
1804 if (avail_p)
1805 {
1806 avail_occr = cur_expr->avail_occr;
1807
1808 /* Search for another occurrence in the same basic block. */
1809 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1810 {
1811 /* If an occurrence isn't found, save a pointer to the end of
1812 the list. */
1813 last_occr = avail_occr;
1814 avail_occr = avail_occr->next;
1815 }
1816
1817 if (avail_occr)
1818 {
1819 /* Found another instance of the expression in the same basic block.
1820 Prefer this occurrence to the currently recorded one. We want
1821 the last one in the block and the block is scanned from start
1822 to end. */
1823 avail_occr->insn = insn;
1824 }
1825 else
1826 {
1827 /* First occurrence of this expression in this basic block. */
1828 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1829 bytes_used += sizeof (struct occr);
1830 /* First occurrence of this expression in any block? */
1831 if (cur_expr->avail_occr == NULL)
1832 cur_expr->avail_occr = avail_occr;
1833 else
1834 last_occr->next = avail_occr;
1835 avail_occr->insn = insn;
1836 avail_occr->next = NULL;
1837 }
1838 }
1839}
1840
1841/* Insert pattern X in INSN in the hash table.
1842 X is a SET of a reg to either another reg or a constant.
1843 If it is already present, record it as the last occurrence in INSN's
1844 basic block. */
1845
1846static void
1847insert_set_in_table (x, insn)
1848 rtx x;
1849 rtx insn;
1850{
1851 int found;
1852 unsigned int hash;
1853 struct expr *cur_expr, *last_expr = NULL;
1854 struct occr *cur_occr, *last_occr = NULL;
1855
1856 if (GET_CODE (x) != SET
1857 || GET_CODE (SET_DEST (x)) != REG)
1858 abort ();
1859
1860 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1861
1862 cur_expr = set_hash_table[hash];
1863 found = 0;
1864
1865 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1866 {
1867 /* If the expression isn't found, save a pointer to the end of
1868 the list. */
1869 last_expr = cur_expr;
1870 cur_expr = cur_expr->next_same_hash;
1871 }
1872
1873 if (! found)
1874 {
1875 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1876 bytes_used += sizeof (struct expr);
1877 if (set_hash_table[hash] == NULL)
1878 {
1879 /* This is the first pattern that hashed to this index. */
1880 set_hash_table[hash] = cur_expr;
1881 }
1882 else
1883 {
1884 /* Add EXPR to end of this hash chain. */
1885 last_expr->next_same_hash = cur_expr;
1886 }
1887 /* Set the fields of the expr element.
1888 We must copy X because it can be modified when copy propagation is
1889 performed on its operands. */
1890 /* ??? Should this go in a different obstack? */
1891 cur_expr->expr = copy_rtx (x);
1892 cur_expr->bitmap_index = n_sets++;
1893 cur_expr->next_same_hash = NULL;
1894 cur_expr->antic_occr = NULL;
1895 cur_expr->avail_occr = NULL;
1896 }
1897
1898 /* Now record the occurrence. */
1899
1900 cur_occr = cur_expr->avail_occr;
1901
1902 /* Search for another occurrence in the same basic block. */
1903 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1904 {
1905 /* If an occurrence isn't found, save a pointer to the end of
1906 the list. */
1907 last_occr = cur_occr;
1908 cur_occr = cur_occr->next;
1909 }
1910
1911 if (cur_occr)
1912 {
1913 /* Found another instance of the expression in the same basic block.
1914 Prefer this occurrence to the currently recorded one. We want
1915 the last one in the block and the block is scanned from start
1916 to end. */
1917 cur_occr->insn = insn;
1918 }
1919 else
1920 {
1921 /* First occurrence of this expression in this basic block. */
1922 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1923 bytes_used += sizeof (struct occr);
1924 /* First occurrence of this expression in any block? */
1925 if (cur_expr->avail_occr == NULL)
1926 cur_expr->avail_occr = cur_occr;
1927 else
1928 last_occr->next = cur_occr;
1929 cur_occr->insn = insn;
1930 cur_occr->next = NULL;
1931 }
1932}
1933
1934/* Scan pattern PAT of INSN and add an entry to the hash table.
1935 If SET_P is non-zero, this is for the assignment hash table,
1936 otherwise it is for the expression hash table. */
1937
1938static void
1939hash_scan_set (pat, insn, set_p)
1940 rtx pat, insn;
1941 int set_p;
1942{
1943 rtx src = SET_SRC (pat);
1944 rtx dest = SET_DEST (pat);
1945
1946 if (GET_CODE (src) == CALL)
1947 hash_scan_call (src, insn);
1948
1949 if (GET_CODE (dest) == REG)
1950 {
1951 int regno = REGNO (dest);
1952 rtx tmp;
1953
1954 /* Only record sets of pseudo-regs in the hash table. */
1955 if (! set_p
1956 && regno >= FIRST_PSEUDO_REGISTER
1957 /* Don't GCSE something if we can't do a reg/reg copy. */
1958 && can_copy_p [GET_MODE (dest)]
1959 /* Is SET_SRC something we want to gcse? */
1960 && want_to_gcse_p (src))
1961 {
1962 /* An expression is not anticipatable if its operands are
1963 modified before this insn. */
3cce638b 1964 int antic_p = oprs_anticipatable_p (src, insn);
7506f491
DE
1965 /* An expression is not available if its operands are
1966 subsequently modified, including this insn. */
1967 int avail_p = oprs_available_p (src, insn);
1968 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1969 }
1970 /* Record sets for constant/copy propagation. */
1971 else if (set_p
1972 && regno >= FIRST_PSEUDO_REGISTER
1973 && ((GET_CODE (src) == REG
1974 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1975 && can_copy_p [GET_MODE (dest)])
e78d9500 1976 || GET_CODE (src) == CONST_INT
05f6f07c 1977 || GET_CODE (src) == SYMBOL_REF
e78d9500 1978 || GET_CODE (src) == CONST_DOUBLE)
7506f491
DE
1979 /* A copy is not available if its src or dest is subsequently
1980 modified. Here we want to search from INSN+1 on, but
1981 oprs_available_p searches from INSN on. */
1982 && (insn == BLOCK_END (BLOCK_NUM (insn))
1983 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1984 && oprs_available_p (pat, tmp))))
1985 insert_set_in_table (pat, insn);
1986 }
7506f491
DE
1987}
1988
1989static void
1990hash_scan_clobber (x, insn)
50b2596f 1991 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
1992{
1993 /* Currently nothing to do. */
1994}
1995
1996static void
1997hash_scan_call (x, insn)
50b2596f 1998 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
7506f491
DE
1999{
2000 /* Currently nothing to do. */
2001}
2002
2003/* Process INSN and add hash table entries as appropriate.
2004
2005 Only available expressions that set a single pseudo-reg are recorded.
2006
2007 Single sets in a PARALLEL could be handled, but it's an extra complication
2008 that isn't dealt with right now. The trick is handling the CLOBBERs that
2009 are also in the PARALLEL. Later.
2010
2011 If SET_P is non-zero, this is for the assignment hash table,
ed79bb3d
R
2012 otherwise it is for the expression hash table.
2013 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
2014 not record any expressions. */
7506f491
DE
2015
2016static void
ed79bb3d 2017hash_scan_insn (insn, set_p, in_libcall_block)
7506f491
DE
2018 rtx insn;
2019 int set_p;
48e87cef 2020 int in_libcall_block;
7506f491
DE
2021{
2022 rtx pat = PATTERN (insn);
2023
2024 /* Pick out the sets of INSN and for other forms of instructions record
2025 what's been modified. */
2026
ed79bb3d 2027 if (GET_CODE (pat) == SET && ! in_libcall_block)
21e3a717
BS
2028 {
2029 /* Ignore obvious no-ops. */
2030 if (SET_SRC (pat) != SET_DEST (pat))
2031 hash_scan_set (pat, insn, set_p);
2032 }
7506f491
DE
2033 else if (GET_CODE (pat) == PARALLEL)
2034 {
2035 int i;
2036
2037 for (i = 0; i < XVECLEN (pat, 0); i++)
2038 {
2039 rtx x = XVECEXP (pat, 0, i);
2040
2041 if (GET_CODE (x) == SET)
2042 {
2043 if (GET_CODE (SET_SRC (x)) == CALL)
2044 hash_scan_call (SET_SRC (x), insn);
7506f491
DE
2045 }
2046 else if (GET_CODE (x) == CLOBBER)
2047 hash_scan_clobber (x, insn);
2048 else if (GET_CODE (x) == CALL)
2049 hash_scan_call (x, insn);
2050 }
2051 }
2052 else if (GET_CODE (pat) == CLOBBER)
2053 hash_scan_clobber (pat, insn);
2054 else if (GET_CODE (pat) == CALL)
2055 hash_scan_call (pat, insn);
2056}
2057
2058static void
2059dump_hash_table (file, name, table, table_size, total_size)
2060 FILE *file;
dff01034 2061 const char *name;
7506f491
DE
2062 struct expr **table;
2063 int table_size, total_size;
2064{
2065 int i;
2066 /* Flattened out table, so it's printed in proper order. */
4da896b2
MM
2067 struct expr **flat_table;
2068 unsigned int *hash_val;
2069
2070 flat_table
2071 = (struct expr **) xcalloc (total_size, sizeof (struct expr *));
2072 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int));
7506f491 2073
7506f491
DE
2074 for (i = 0; i < table_size; i++)
2075 {
2076 struct expr *expr;
2077
2078 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
2079 {
2080 flat_table[expr->bitmap_index] = expr;
2081 hash_val[expr->bitmap_index] = i;
2082 }
2083 }
2084
2085 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
2086 name, table_size, total_size);
2087
2088 for (i = 0; i < total_size; i++)
2089 {
2090 struct expr *expr = flat_table[i];
2091
2092 fprintf (file, "Index %d (hash value %d)\n ",
2093 expr->bitmap_index, hash_val[i]);
2094 print_rtl (file, expr->expr);
2095 fprintf (file, "\n");
2096 }
2097
2098 fprintf (file, "\n");
4da896b2
MM
2099
2100 /* Clean up. */
2101 free (flat_table);
2102 free (hash_val);
7506f491
DE
2103}
2104
2105/* Record register first/last/block set information for REGNO in INSN.
2106 reg_first_set records the first place in the block where the register
2107 is set and is used to compute "anticipatability".
2108 reg_last_set records the last place in the block where the register
2109 is set and is used to compute "availability".
2110 reg_set_in_block records whether the register is set in the block
2111 and is used to compute "transparency". */
2112
2113static void
2114record_last_reg_set_info (insn, regno)
2115 rtx insn;
2116 int regno;
2117{
b86ba9c8 2118 if (reg_first_set[regno] == NEVER_SET)
7506f491
DE
2119 reg_first_set[regno] = INSN_CUID (insn);
2120 reg_last_set[regno] = INSN_CUID (insn);
2121 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
2122}
2123
2124/* Record memory first/last/block set information for INSN. */
2125
2126static void
2127record_last_mem_set_info (insn)
2128 rtx insn;
2129{
b86ba9c8 2130 if (mem_first_set == NEVER_SET)
7506f491
DE
2131 mem_first_set = INSN_CUID (insn);
2132 mem_last_set = INSN_CUID (insn);
2133 mem_set_in_block[BLOCK_NUM (insn)] = 1;
2134}
2135
7506f491 2136/* Called from compute_hash_table via note_stores to handle one
84832317
MM
2137 SET or CLOBBER in an insn. DATA is really the instruction in which
2138 the SET is taking place. */
7506f491
DE
2139
2140static void
84832317 2141record_last_set_info (dest, setter, data)
50b2596f 2142 rtx dest, setter ATTRIBUTE_UNUSED;
84832317 2143 void *data;
7506f491 2144{
84832317
MM
2145 rtx last_set_insn = (rtx) data;
2146
7506f491
DE
2147 if (GET_CODE (dest) == SUBREG)
2148 dest = SUBREG_REG (dest);
2149
2150 if (GET_CODE (dest) == REG)
2151 record_last_reg_set_info (last_set_insn, REGNO (dest));
2152 else if (GET_CODE (dest) == MEM
2153 /* Ignore pushes, they clobber nothing. */
2154 && ! push_operand (dest, GET_MODE (dest)))
2155 record_last_mem_set_info (last_set_insn);
2156}
2157
2158/* Top level function to create an expression or assignment hash table.
2159
2160 Expression entries are placed in the hash table if
2161 - they are of the form (set (pseudo-reg) src),
2162 - src is something we want to perform GCSE on,
2163 - none of the operands are subsequently modified in the block
2164
2165 Assignment entries are placed in the hash table if
2166 - they are of the form (set (pseudo-reg) src),
2167 - src is something we want to perform const/copy propagation on,
2168 - none of the operands or target are subsequently modified in the block
2169 Currently src must be a pseudo-reg or a const_int.
2170
2171 F is the first insn.
2172 SET_P is non-zero for computing the assignment hash table. */
2173
2174static void
b5ce41ff 2175compute_hash_table (set_p)
7506f491
DE
2176 int set_p;
2177{
2178 int bb;
2179
2180 /* While we compute the hash table we also compute a bit array of which
2181 registers are set in which blocks.
2182 We also compute which blocks set memory, in the absence of aliasing
2183 support [which is TODO].
2184 ??? This isn't needed during const/copy propagation, but it's cheap to
2185 compute. Later. */
2186 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2187 bzero ((char *) mem_set_in_block, n_basic_blocks);
2188
2189 /* Some working arrays used to track first and last set in each block. */
2190 /* ??? One could use alloca here, but at some size a threshold is crossed
2191 beyond which one should use malloc. Are we at that threshold here? */
2192 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2193 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2194
2195 for (bb = 0; bb < n_basic_blocks; bb++)
2196 {
2197 rtx insn;
2198 int regno;
ed79bb3d 2199 int in_libcall_block;
b86ba9c8 2200 int i;
7506f491
DE
2201
2202 /* First pass over the instructions records information used to
2203 determine when registers and memory are first and last set.
2204 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2205 could be moved to compute_sets since they currently don't change. */
2206
b86ba9c8
GK
2207 for (i = 0; i < max_gcse_regno; i++)
2208 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2209 mem_first_set = NEVER_SET;
2210 mem_last_set = NEVER_SET;
7506f491 2211
3b413743
RH
2212 for (insn = BLOCK_HEAD (bb);
2213 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
2214 insn = NEXT_INSN (insn))
2215 {
2216#ifdef NON_SAVING_SETJMP
2217 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2218 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2219 {
2220 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2221 record_last_reg_set_info (insn, regno);
2222 continue;
2223 }
2224#endif
2225
2226 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2227 continue;
2228
2229 if (GET_CODE (insn) == CALL_INSN)
2230 {
2231 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
15f8470f
JL
2232 if ((call_used_regs[regno]
2233 && regno != STACK_POINTER_REGNUM
2234#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2235 && regno != HARD_FRAME_POINTER_REGNUM
2236#endif
2237#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2238 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2239#endif
2240#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2241 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2242#endif
2243
2244 && regno != FRAME_POINTER_REGNUM)
2245 || global_regs[regno])
7506f491
DE
2246 record_last_reg_set_info (insn, regno);
2247 if (! CONST_CALL_P (insn))
2248 record_last_mem_set_info (insn);
2249 }
2250
84832317 2251 note_stores (PATTERN (insn), record_last_set_info, insn);
7506f491
DE
2252 }
2253
2254 /* The next pass builds the hash table. */
2255
3b413743
RH
2256 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2257 insn && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
2258 insn = NEXT_INSN (insn))
2259 {
2260 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
ed79bb3d
R
2261 {
2262 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2263 in_libcall_block = 1;
2264 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2265 in_libcall_block = 0;
2266 hash_scan_insn (insn, set_p, in_libcall_block);
2267 }
7506f491
DE
2268 }
2269 }
2270
2271 free (reg_first_set);
2272 free (reg_last_set);
2273 /* Catch bugs early. */
2274 reg_first_set = reg_last_set = 0;
2275}
2276
2277/* Allocate space for the set hash table.
2278 N_INSNS is the number of instructions in the function.
2279 It is used to determine the number of buckets to use. */
2280
2281static void
2282alloc_set_hash_table (n_insns)
2283 int n_insns;
2284{
2285 int n;
2286
2287 set_hash_table_size = n_insns / 4;
2288 if (set_hash_table_size < 11)
2289 set_hash_table_size = 11;
2290 /* Attempt to maintain efficient use of hash table.
2291 Making it an odd number is simplest for now.
2292 ??? Later take some measurements. */
2293 set_hash_table_size |= 1;
2294 n = set_hash_table_size * sizeof (struct expr *);
2295 set_hash_table = (struct expr **) gmalloc (n);
2296}
2297
2298/* Free things allocated by alloc_set_hash_table. */
2299
2300static void
2301free_set_hash_table ()
2302{
2303 free (set_hash_table);
2304}
2305
2306/* Compute the hash table for doing copy/const propagation. */
2307
2308static void
b5ce41ff 2309compute_set_hash_table ()
7506f491
DE
2310{
2311 /* Initialize count of number of entries in hash table. */
2312 n_sets = 0;
2313 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2314
b5ce41ff 2315 compute_hash_table (1);
7506f491
DE
2316}
2317
2318/* Allocate space for the expression hash table.
2319 N_INSNS is the number of instructions in the function.
2320 It is used to determine the number of buckets to use. */
2321
2322static void
2323alloc_expr_hash_table (n_insns)
2324 int n_insns;
2325{
2326 int n;
2327
2328 expr_hash_table_size = n_insns / 2;
2329 /* Make sure the amount is usable. */
2330 if (expr_hash_table_size < 11)
2331 expr_hash_table_size = 11;
2332 /* Attempt to maintain efficient use of hash table.
2333 Making it an odd number is simplest for now.
2334 ??? Later take some measurements. */
2335 expr_hash_table_size |= 1;
2336 n = expr_hash_table_size * sizeof (struct expr *);
2337 expr_hash_table = (struct expr **) gmalloc (n);
2338}
2339
2340/* Free things allocated by alloc_expr_hash_table. */
2341
2342static void
2343free_expr_hash_table ()
2344{
2345 free (expr_hash_table);
2346}
2347
2348/* Compute the hash table for doing GCSE. */
2349
2350static void
b5ce41ff 2351compute_expr_hash_table ()
7506f491
DE
2352{
2353 /* Initialize count of number of entries in hash table. */
2354 n_exprs = 0;
2355 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2356
b5ce41ff 2357 compute_hash_table (0);
7506f491
DE
2358}
2359\f
2360/* Expression tracking support. */
2361
2362/* Lookup pattern PAT in the expression table.
2363 The result is a pointer to the table entry, or NULL if not found. */
2364
2365static struct expr *
2366lookup_expr (pat)
2367 rtx pat;
2368{
2369 int do_not_record_p;
2370 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2371 expr_hash_table_size);
2372 struct expr *expr;
2373
2374 if (do_not_record_p)
2375 return NULL;
2376
2377 expr = expr_hash_table[hash];
2378
2379 while (expr && ! expr_equiv_p (expr->expr, pat))
2380 expr = expr->next_same_hash;
2381
2382 return expr;
2383}
2384
2385/* Lookup REGNO in the set table.
2386 If PAT is non-NULL look for the entry that matches it, otherwise return
2387 the first entry for REGNO.
2388 The result is a pointer to the table entry, or NULL if not found. */
2389
2390static struct expr *
2391lookup_set (regno, pat)
2392 int regno;
2393 rtx pat;
2394{
2395 unsigned int hash = hash_set (regno, set_hash_table_size);
2396 struct expr *expr;
2397
2398 expr = set_hash_table[hash];
2399
2400 if (pat)
2401 {
2402 while (expr && ! expr_equiv_p (expr->expr, pat))
2403 expr = expr->next_same_hash;
2404 }
2405 else
2406 {
2407 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2408 expr = expr->next_same_hash;
2409 }
2410
2411 return expr;
2412}
2413
2414/* Return the next entry for REGNO in list EXPR. */
2415
2416static struct expr *
2417next_set (regno, expr)
2418 int regno;
2419 struct expr *expr;
2420{
2421 do
2422 expr = expr->next_same_hash;
2423 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2424 return expr;
2425}
2426
2427/* Reset tables used to keep track of what's still available [since the
2428 start of the block]. */
2429
2430static void
2431reset_opr_set_tables ()
2432{
2433 /* Maintain a bitmap of which regs have been set since beginning of
2434 the block. */
2435 sbitmap_zero (reg_set_bitmap);
2436 /* Also keep a record of the last instruction to modify memory.
2437 For now this is very trivial, we only record whether any memory
2438 location has been modified. */
2439 mem_last_set = 0;
2440}
2441
2442/* Return non-zero if the operands of X are not set before INSN in
2443 INSN's basic block. */
2444
2445static int
2446oprs_not_set_p (x, insn)
2447 rtx x, insn;
2448{
2449 int i;
2450 enum rtx_code code;
6f7d635c 2451 const char *fmt;
7506f491
DE
2452
2453 /* repeat is used to turn tail-recursion into iteration. */
2454repeat:
2455
2456 if (x == 0)
2457 return 1;
2458
2459 code = GET_CODE (x);
2460 switch (code)
2461 {
2462 case PC:
2463 case CC0:
2464 case CONST:
2465 case CONST_INT:
2466 case CONST_DOUBLE:
2467 case SYMBOL_REF:
2468 case LABEL_REF:
2469 case ADDR_VEC:
2470 case ADDR_DIFF_VEC:
2471 return 1;
2472
2473 case MEM:
2474 if (mem_last_set != 0)
2475 return 0;
2476 x = XEXP (x, 0);
2477 goto repeat;
2478
2479 case REG:
2480 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2481
2482 default:
2483 break;
2484 }
2485
2486 fmt = GET_RTX_FORMAT (code);
2487 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2488 {
2489 if (fmt[i] == 'e')
2490 {
2491 int not_set_p;
2492 /* If we are about to do the last recursive call
2493 needed at this level, change it into iteration.
2494 This function is called enough to be worth it. */
2495 if (i == 0)
2496 {
2497 x = XEXP (x, 0);
2498 goto repeat;
2499 }
2500 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2501 if (! not_set_p)
2502 return 0;
2503 }
2504 else if (fmt[i] == 'E')
2505 {
2506 int j;
2507 for (j = 0; j < XVECLEN (x, i); j++)
2508 {
2509 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2510 if (! not_set_p)
2511 return 0;
2512 }
2513 }
2514 }
2515
2516 return 1;
2517}
2518
2519/* Mark things set by a CALL. */
2520
2521static void
b5ce41ff
JL
2522mark_call (insn)
2523 rtx insn;
7506f491
DE
2524{
2525 mem_last_set = INSN_CUID (insn);
2526}
2527
2528/* Mark things set by a SET. */
2529
2530static void
2531mark_set (pat, insn)
2532 rtx pat, insn;
2533{
2534 rtx dest = SET_DEST (pat);
2535
2536 while (GET_CODE (dest) == SUBREG
2537 || GET_CODE (dest) == ZERO_EXTRACT
2538 || GET_CODE (dest) == SIGN_EXTRACT
2539 || GET_CODE (dest) == STRICT_LOW_PART)
2540 dest = XEXP (dest, 0);
2541
2542 if (GET_CODE (dest) == REG)
2543 SET_BIT (reg_set_bitmap, REGNO (dest));
2544 else if (GET_CODE (dest) == MEM)
2545 mem_last_set = INSN_CUID (insn);
2546
2547 if (GET_CODE (SET_SRC (pat)) == CALL)
b5ce41ff 2548 mark_call (insn);
7506f491
DE
2549}
2550
2551/* Record things set by a CLOBBER. */
2552
2553static void
2554mark_clobber (pat, insn)
2555 rtx pat, insn;
2556{
2557 rtx clob = XEXP (pat, 0);
2558
2559 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2560 clob = XEXP (clob, 0);
2561
2562 if (GET_CODE (clob) == REG)
2563 SET_BIT (reg_set_bitmap, REGNO (clob));
2564 else
2565 mem_last_set = INSN_CUID (insn);
2566}
2567
2568/* Record things set by INSN.
2569 This data is used by oprs_not_set_p. */
2570
2571static void
2572mark_oprs_set (insn)
2573 rtx insn;
2574{
2575 rtx pat = PATTERN (insn);
2576
2577 if (GET_CODE (pat) == SET)
2578 mark_set (pat, insn);
2579 else if (GET_CODE (pat) == PARALLEL)
2580 {
2581 int i;
2582
2583 for (i = 0; i < XVECLEN (pat, 0); i++)
2584 {
2585 rtx x = XVECEXP (pat, 0, i);
2586
2587 if (GET_CODE (x) == SET)
2588 mark_set (x, insn);
2589 else if (GET_CODE (x) == CLOBBER)
2590 mark_clobber (x, insn);
2591 else if (GET_CODE (x) == CALL)
b5ce41ff 2592 mark_call (insn);
7506f491
DE
2593 }
2594 }
2595 else if (GET_CODE (pat) == CLOBBER)
2596 mark_clobber (pat, insn);
2597 else if (GET_CODE (pat) == CALL)
b5ce41ff 2598 mark_call (insn);
7506f491 2599}
b5ce41ff 2600
7506f491
DE
2601\f
2602/* Classic GCSE reaching definition support. */
2603
2604/* Allocate reaching def variables. */
2605
2606static void
2607alloc_rd_mem (n_blocks, n_insns)
2608 int n_blocks, n_insns;
2609{
2610 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2611 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2612
2613 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2614 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2615
2616 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2617 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2618
2619 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2620 sbitmap_vector_zero (rd_out, n_basic_blocks);
2621}
2622
2623/* Free reaching def variables. */
2624
2625static void
2626free_rd_mem ()
2627{
2628 free (rd_kill);
2629 free (rd_gen);
2630 free (reaching_defs);
2631 free (rd_out);
2632}
2633
2634/* Add INSN to the kills of BB.
2635 REGNO, set in BB, is killed by INSN. */
2636
2637static void
2638handle_rd_kill_set (insn, regno, bb)
2639 rtx insn;
2640 int regno, bb;
2641{
2642 struct reg_set *this_reg = reg_set_table[regno];
2643
2644 while (this_reg)
2645 {
2646 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2647 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2648 this_reg = this_reg->next;
2649 }
2650}
2651
7506f491
DE
2652/* Compute the set of kill's for reaching definitions. */
2653
2654static void
2655compute_kill_rd ()
2656{
2657 int bb,cuid;
2658
2659 /* For each block
2660 For each set bit in `gen' of the block (i.e each insn which
ac7c5af5
JL
2661 generates a definition in the block)
2662 Call the reg set by the insn corresponding to that bit regx
2663 Look at the linked list starting at reg_set_table[regx]
2664 For each setting of regx in the linked list, which is not in
2665 this block
2666 Set the bit in `kill' corresponding to that insn
7506f491
DE
2667 */
2668
2669 for (bb = 0; bb < n_basic_blocks; bb++)
2670 {
2671 for (cuid = 0; cuid < max_cuid; cuid++)
2672 {
2673 if (TEST_BIT (rd_gen[bb], cuid))
ac7c5af5 2674 {
7506f491
DE
2675 rtx insn = CUID_INSN (cuid);
2676 rtx pat = PATTERN (insn);
2677
2678 if (GET_CODE (insn) == CALL_INSN)
ac7c5af5 2679 {
7506f491
DE
2680 int regno;
2681
2682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
ac7c5af5 2683 {
15f8470f
JL
2684 if ((call_used_regs[regno]
2685 && regno != STACK_POINTER_REGNUM
2686#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2687 && regno != HARD_FRAME_POINTER_REGNUM
2688#endif
2689#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2690 && ! (regno == ARG_POINTER_REGNUM
2691 && fixed_regs[regno])
2692#endif
2693#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2694 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2695#endif
2696 && regno != FRAME_POINTER_REGNUM)
2697 || global_regs[regno])
7506f491 2698 handle_rd_kill_set (insn, regno, bb);
ac7c5af5
JL
2699 }
2700 }
7506f491
DE
2701
2702 if (GET_CODE (pat) == PARALLEL)
2703 {
2704 int i;
2705
2706 /* We work backwards because ... */
2707 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2708 {
2709 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2710 if ((code == SET || code == CLOBBER)
2711 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2712 handle_rd_kill_set (insn,
2713 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2714 bb);
2715 }
2716 }
2717 else if (GET_CODE (pat) == SET)
2718 {
2719 if (GET_CODE (SET_DEST (pat)) == REG)
2720 {
2721 /* Each setting of this register outside of this block
2722 must be marked in the set of kills in this block. */
2723 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2724 }
ac7c5af5 2725 }
7506f491 2726 /* FIXME: CLOBBER? */
ac7c5af5 2727 }
7506f491
DE
2728 }
2729 }
2730}
2731
2732/* Compute the reaching definitions as in
2733 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2734 Chapter 10. It is the same algorithm as used for computing available
2735 expressions but applied to the gens and kills of reaching definitions. */
2736
2737static void
2738compute_rd ()
2739{
2740 int bb, changed, passes;
2741
2742 for (bb = 0; bb < n_basic_blocks; bb++)
2743 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2744
2745 passes = 0;
2746 changed = 1;
2747 while (changed)
2748 {
2749 changed = 0;
2750 for (bb = 0; bb < n_basic_blocks; bb++)
ac7c5af5 2751 {
36349f8b 2752 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb);
7506f491
DE
2753 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2754 reaching_defs[bb], rd_kill[bb]);
ac7c5af5 2755 }
7506f491
DE
2756 passes++;
2757 }
2758
2759 if (gcse_file)
2760 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2761}
2762\f
2763/* Classic GCSE available expression support. */
2764
2765/* Allocate memory for available expression computation. */
2766
2767static void
2768alloc_avail_expr_mem (n_blocks, n_exprs)
2769 int n_blocks, n_exprs;
2770{
2771 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2772 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2773
2774 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2775 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2776
2777 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2778 sbitmap_vector_zero (ae_in, n_basic_blocks);
2779
2780 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2781 sbitmap_vector_zero (ae_out, n_basic_blocks);
2782
2783 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2784 sbitmap_ones (u_bitmap);
2785}
2786
2787static void
2788free_avail_expr_mem ()
2789{
2790 free (ae_kill);
2791 free (ae_gen);
2792 free (ae_in);
2793 free (ae_out);
2794 free (u_bitmap);
2795}
2796
2797/* Compute the set of available expressions generated in each basic block. */
2798
2799static void
2800compute_ae_gen ()
2801{
2802 int i;
2803
2804 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2805 This is all we have to do because an expression is not recorded if it
2806 is not available, and the only expressions we want to work with are the
2807 ones that are recorded. */
2808
2809 for (i = 0; i < expr_hash_table_size; i++)
2810 {
2811 struct expr *expr = expr_hash_table[i];
2812 while (expr != NULL)
2813 {
2814 struct occr *occr = expr->avail_occr;
2815 while (occr != NULL)
2816 {
2817 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2818 occr = occr->next;
2819 }
2820 expr = expr->next_same_hash;
2821 }
2822 }
2823}
2824
2825/* Return non-zero if expression X is killed in BB. */
2826
2827static int
2828expr_killed_p (x, bb)
2829 rtx x;
2830 int bb;
2831{
2832 int i;
2833 enum rtx_code code;
6f7d635c 2834 const char *fmt;
7506f491
DE
2835
2836 /* repeat is used to turn tail-recursion into iteration. */
2837 repeat:
2838
2839 if (x == 0)
2840 return 1;
2841
2842 code = GET_CODE (x);
2843 switch (code)
2844 {
2845 case REG:
2846 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2847
2848 case MEM:
2849 if (mem_set_in_block[bb])
2850 return 1;
2851 x = XEXP (x, 0);
2852 goto repeat;
2853
2854 case PC:
2855 case CC0: /*FIXME*/
2856 case CONST:
2857 case CONST_INT:
2858 case CONST_DOUBLE:
2859 case SYMBOL_REF:
2860 case LABEL_REF:
2861 case ADDR_VEC:
2862 case ADDR_DIFF_VEC:
2863 return 0;
2864
2865 default:
2866 break;
2867 }
2868
2869 i = GET_RTX_LENGTH (code) - 1;
2870 fmt = GET_RTX_FORMAT (code);
2871 for (; i >= 0; i--)
2872 {
2873 if (fmt[i] == 'e')
2874 {
2875 rtx tem = XEXP (x, i);
2876
2877 /* If we are about to do the last recursive call
2878 needed at this level, change it into iteration.
2879 This function is called enough to be worth it. */
2880 if (i == 0)
2881 {
2882 x = tem;
2883 goto repeat;
2884 }
2885 if (expr_killed_p (tem, bb))
2886 return 1;
2887 }
2888 else if (fmt[i] == 'E')
2889 {
2890 int j;
2891 for (j = 0; j < XVECLEN (x, i); j++)
2892 {
2893 if (expr_killed_p (XVECEXP (x, i, j), bb))
2894 return 1;
2895 }
2896 }
2897 }
2898
2899 return 0;
2900}
2901
2902/* Compute the set of available expressions killed in each basic block. */
2903
2904static void
a42cd965
AM
2905compute_ae_kill (ae_gen, ae_kill)
2906 sbitmap *ae_gen, *ae_kill;
7506f491
DE
2907{
2908 int bb,i;
2909
2910 for (bb = 0; bb < n_basic_blocks; bb++)
2911 {
2912 for (i = 0; i < expr_hash_table_size; i++)
2913 {
2914 struct expr *expr = expr_hash_table[i];
2915
2916 for ( ; expr != NULL; expr = expr->next_same_hash)
2917 {
2918 /* Skip EXPR if generated in this block. */
2919 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2920 continue;
2921
2922 if (expr_killed_p (expr->expr, bb))
2923 SET_BIT (ae_kill[bb], expr->bitmap_index);
2924 }
2925 }
2926 }
2927}
7506f491
DE
2928\f
2929/* Actually perform the Classic GCSE optimizations. */
2930
2931/* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2932
2933 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2934 as a positive reach. We want to do this when there are two computations
2935 of the expression in the block.
2936
2937 VISITED is a pointer to a working buffer for tracking which BB's have
2938 been visited. It is NULL for the top-level call.
2939
2940 We treat reaching expressions that go through blocks containing the same
2941 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2942 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2943 2 as not reaching. The intent is to improve the probability of finding
2944 only one reaching expression and to reduce register lifetimes by picking
2945 the closest such expression. */
2946
2947static int
283a2545 2948expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited)
7506f491
DE
2949 struct occr *occr;
2950 struct expr *expr;
2951 int bb;
2952 int check_self_loop;
2953 char *visited;
2954{
36349f8b 2955 edge pred;
7506f491 2956
36349f8b 2957 for (pred = BASIC_BLOCK(bb)->pred; pred != NULL; pred = pred->pred_next)
7506f491 2958 {
36349f8b 2959 int pred_bb = pred->src->index;
7506f491
DE
2960
2961 if (visited[pred_bb])
ac7c5af5 2962 {
7506f491
DE
2963 /* This predecessor has already been visited.
2964 Nothing to do. */
2965 ;
2966 }
2967 else if (pred_bb == bb)
ac7c5af5 2968 {
7506f491
DE
2969 /* BB loops on itself. */
2970 if (check_self_loop
2971 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2972 && BLOCK_NUM (occr->insn) == pred_bb)
2973 return 1;
2974 visited[pred_bb] = 1;
ac7c5af5 2975 }
7506f491
DE
2976 /* Ignore this predecessor if it kills the expression. */
2977 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2978 visited[pred_bb] = 1;
2979 /* Does this predecessor generate this expression? */
2980 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2981 {
2982 /* Is this the occurrence we're looking for?
2983 Note that there's only one generating occurrence per block
2984 so we just need to check the block number. */
2985 if (BLOCK_NUM (occr->insn) == pred_bb)
2986 return 1;
2987 visited[pred_bb] = 1;
2988 }
2989 /* Neither gen nor kill. */
2990 else
ac7c5af5 2991 {
7506f491 2992 visited[pred_bb] = 1;
283a2545
RL
2993 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop,
2994 visited))
7506f491 2995 return 1;
ac7c5af5 2996 }
7506f491
DE
2997 }
2998
2999 /* All paths have been checked. */
3000 return 0;
3001}
3002
283a2545
RL
3003/* This wrapper for expr_reaches_here_p_work() is to ensure that any
3004 memory allocated for that function is returned. */
3005
3006static int
3007expr_reaches_here_p (occr, expr, bb, check_self_loop)
3008 struct occr *occr;
3009 struct expr *expr;
3010 int bb;
3011 int check_self_loop;
3012{
3013 int rval;
3014 char * visited = (char *) xcalloc (n_basic_blocks, 1);
3015
3016 rval = expr_reaches_here_p_work(occr, expr, bb, check_self_loop, visited);
3017
3018 free (visited);
3019
3020 return (rval);
3021}
3022
7506f491
DE
3023/* Return the instruction that computes EXPR that reaches INSN's basic block.
3024 If there is more than one such instruction, return NULL.
3025
3026 Called only by handle_avail_expr. */
3027
3028static rtx
3029computing_insn (expr, insn)
3030 struct expr *expr;
3031 rtx insn;
3032{
3033 int bb = BLOCK_NUM (insn);
3034
3035 if (expr->avail_occr->next == NULL)
3036 {
3037 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
3038 {
3039 /* The available expression is actually itself
3040 (i.e. a loop in the flow graph) so do nothing. */
3041 return NULL;
3042 }
3043 /* (FIXME) Case that we found a pattern that was created by
3044 a substitution that took place. */
3045 return expr->avail_occr->insn;
3046 }
3047 else
3048 {
3049 /* Pattern is computed more than once.
3050 Search backwards from this insn to see how many of these
3051 computations actually reach this insn. */
3052 struct occr *occr;
3053 rtx insn_computes_expr = NULL;
3054 int can_reach = 0;
3055
3056 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3057 {
3058 if (BLOCK_NUM (occr->insn) == bb)
3059 {
3060 /* The expression is generated in this block.
3061 The only time we care about this is when the expression
3062 is generated later in the block [and thus there's a loop].
3063 We let the normal cse pass handle the other cases. */
3064 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
3065 {
283a2545 3066 if (expr_reaches_here_p (occr, expr, bb, 1))
7506f491
DE
3067 {
3068 can_reach++;
3069 if (can_reach > 1)
3070 return NULL;
3071 insn_computes_expr = occr->insn;
3072 }
3073 }
3074 }
3075 else /* Computation of the pattern outside this block. */
3076 {
283a2545 3077 if (expr_reaches_here_p (occr, expr, bb, 0))
7506f491
DE
3078 {
3079 can_reach++;
3080 if (can_reach > 1)
3081 return NULL;
3082 insn_computes_expr = occr->insn;
3083 }
3084 }
3085 }
3086
3087 if (insn_computes_expr == NULL)
3088 abort ();
3089 return insn_computes_expr;
3090 }
3091}
3092
3093/* Return non-zero if the definition in DEF_INSN can reach INSN.
3094 Only called by can_disregard_other_sets. */
3095
3096static int
3097def_reaches_here_p (insn, def_insn)
3098 rtx insn, def_insn;
3099{
3100 rtx reg;
3101
3102 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3103 return 1;
3104
3105 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3106 {
3107 if (INSN_CUID (def_insn) < INSN_CUID (insn))
ac7c5af5 3108 {
7506f491
DE
3109 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3110 return 1;
3111 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3112 reg = XEXP (PATTERN (def_insn), 0);
3113 else if (GET_CODE (PATTERN (def_insn)) == SET)
3114 reg = SET_DEST (PATTERN (def_insn));
3115 else
3116 abort ();
3117 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3118 }
3119 else
3120 return 0;
3121 }
3122
3123 return 0;
3124}
3125
3126/* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3127 The value returned is the number of definitions that reach INSN.
3128 Returning a value of zero means that [maybe] more than one definition
3129 reaches INSN and the caller can't perform whatever optimization it is
3130 trying. i.e. it is always safe to return zero. */
3131
3132static int
3133can_disregard_other_sets (addr_this_reg, insn, for_combine)
3134 struct reg_set **addr_this_reg;
3135 rtx insn;
3136 int for_combine;
3137{
3138 int number_of_reaching_defs = 0;
3139 struct reg_set *this_reg = *addr_this_reg;
3140
3141 while (this_reg)
3142 {
3143 if (def_reaches_here_p (insn, this_reg->insn))
3144 {
3145 number_of_reaching_defs++;
3146 /* Ignore parallels for now. */
3147 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3148 return 0;
3149 if (!for_combine
3150 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3151 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3152 SET_SRC (PATTERN (insn)))))
3153 {
3154 /* A setting of the reg to a different value reaches INSN. */
3155 return 0;
3156 }
3157 if (number_of_reaching_defs > 1)
3158 {
3159 /* If in this setting the value the register is being
3160 set to is equal to the previous value the register
3161 was set to and this setting reaches the insn we are
3162 trying to do the substitution on then we are ok. */
3163
3164 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3165 return 0;
3166 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3167 SET_SRC (PATTERN (insn))))
3168 return 0;
3169 }
3170 *addr_this_reg = this_reg;
3171 }
3172
3173 /* prev_this_reg = this_reg; */
3174 this_reg = this_reg->next;
3175 }
3176
3177 return number_of_reaching_defs;
3178}
3179
3180/* Expression computed by insn is available and the substitution is legal,
3181 so try to perform the substitution.
3182
3183 The result is non-zero if any changes were made. */
3184
3185static int
3186handle_avail_expr (insn, expr)
3187 rtx insn;
3188 struct expr *expr;
3189{
3190 rtx pat, insn_computes_expr;
3191 rtx to;
3192 struct reg_set *this_reg;
3193 int found_setting, use_src;
3194 int changed = 0;
3195
3196 /* We only handle the case where one computation of the expression
3197 reaches this instruction. */
3198 insn_computes_expr = computing_insn (expr, insn);
3199 if (insn_computes_expr == NULL)
3200 return 0;
3201
3202 found_setting = 0;
3203 use_src = 0;
3204
3205 /* At this point we know only one computation of EXPR outside of this
3206 block reaches this insn. Now try to find a register that the
3207 expression is computed into. */
3208
3209 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3210 {
3211 /* This is the case when the available expression that reaches
3212 here has already been handled as an available expression. */
3213 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3214 /* If the register was created by GCSE we can't use `reg_set_table',
3215 however we know it's set only once. */
3216 if (regnum_for_replacing >= max_gcse_regno
3217 /* If the register the expression is computed into is set only once,
3218 or only one set reaches this insn, we can use it. */
3219 || (((this_reg = reg_set_table[regnum_for_replacing]),
3220 this_reg->next == NULL)
3221 || can_disregard_other_sets (&this_reg, insn, 0)))
3222 {
3223 use_src = 1;
3224 found_setting = 1;
3225 }
3226 }
3227
3228 if (!found_setting)
3229 {
3230 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3231 /* This shouldn't happen. */
3232 if (regnum_for_replacing >= max_gcse_regno)
3233 abort ();
3234 this_reg = reg_set_table[regnum_for_replacing];
3235 /* If the register the expression is computed into is set only once,
3236 or only one set reaches this insn, use it. */
3237 if (this_reg->next == NULL
3238 || can_disregard_other_sets (&this_reg, insn, 0))
3239 found_setting = 1;
3240 }
3241
3242 if (found_setting)
3243 {
3244 pat = PATTERN (insn);
3245 if (use_src)
3246 to = SET_SRC (PATTERN (insn_computes_expr));
3247 else
3248 to = SET_DEST (PATTERN (insn_computes_expr));
3249 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3250
3251 /* We should be able to ignore the return code from validate_change but
3252 to play it safe we check. */
3253 if (changed)
3254 {
3255 gcse_subst_count++;
3256 if (gcse_file != NULL)
3257 {
3258 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3259 INSN_UID (insn), REGNO (to),
3260 use_src ? "from" : "set in",
3261 INSN_UID (insn_computes_expr));
3262 }
3263
3264 }
3265 }
3266 /* The register that the expr is computed into is set more than once. */
3267 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3268 {
3269 /* Insert an insn after insnx that copies the reg set in insnx
3270 into a new pseudo register call this new register REGN.
3271 From insnb until end of basic block or until REGB is set
3272 replace all uses of REGB with REGN. */
3273 rtx new_insn;
3274
3275 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3276
3277 /* Generate the new insn. */
3278 /* ??? If the change fails, we return 0, even though we created
3279 an insn. I think this is ok. */
9e6a5703
JC
3280 new_insn
3281 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3282 SET_DEST (PATTERN (insn_computes_expr))),
7506f491
DE
3283 insn_computes_expr);
3284 /* Keep block number table up to date. */
3285 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3286 /* Keep register set table up to date. */
3287 record_one_set (REGNO (to), new_insn);
3288
3289 gcse_create_count++;
3290 if (gcse_file != NULL)
ac7c5af5 3291 {
7506f491
DE
3292 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3293 INSN_UID (NEXT_INSN (insn_computes_expr)),
3294 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3295 INSN_UID (insn_computes_expr));
3296 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
ac7c5af5 3297 }
7506f491
DE
3298
3299 pat = PATTERN (insn);
3300
3301 /* Do register replacement for INSN. */
3302 changed = validate_change (insn, &SET_SRC (pat),
3303 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3304 0);
3305
3306 /* We should be able to ignore the return code from validate_change but
3307 to play it safe we check. */
3308 if (changed)
3309 {
3310 gcse_subst_count++;
3311 if (gcse_file != NULL)
3312 {
3313 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3314 INSN_UID (insn),
3315 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3316 INSN_UID (insn_computes_expr));
3317 }
3318
3319 }
3320 }
3321
3322 return changed;
3323}
3324
3325/* Perform classic GCSE.
3326 This is called by one_classic_gcse_pass after all the dataflow analysis
3327 has been done.
3328
3329 The result is non-zero if a change was made. */
3330
3331static int
3332classic_gcse ()
3333{
3334 int bb, changed;
3335 rtx insn;
3336
3337 /* Note we start at block 1. */
3338
3339 changed = 0;
3340 for (bb = 1; bb < n_basic_blocks; bb++)
3341 {
3342 /* Reset tables used to keep track of what's still valid [since the
3343 start of the block]. */
3344 reset_opr_set_tables ();
3345
3b413743
RH
3346 for (insn = BLOCK_HEAD (bb);
3347 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
3348 insn = NEXT_INSN (insn))
3349 {
3350 /* Is insn of form (set (pseudo-reg) ...)? */
3351
3352 if (GET_CODE (insn) == INSN
3353 && GET_CODE (PATTERN (insn)) == SET
3354 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3355 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3356 {
3357 rtx pat = PATTERN (insn);
3358 rtx src = SET_SRC (pat);
3359 struct expr *expr;
3360
3361 if (want_to_gcse_p (src)
3362 /* Is the expression recorded? */
3363 && ((expr = lookup_expr (src)) != NULL)
3364 /* Is the expression available [at the start of the
3365 block]? */
3366 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3367 /* Are the operands unchanged since the start of the
3368 block? */
3369 && oprs_not_set_p (src, insn))
3370 changed |= handle_avail_expr (insn, expr);
3371 }
3372
3373 /* Keep track of everything modified by this insn. */
3374 /* ??? Need to be careful w.r.t. mods done to INSN. */
3375 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3376 mark_oprs_set (insn);
ac7c5af5 3377 }
7506f491
DE
3378 }
3379
3380 return changed;
3381}
3382
3383/* Top level routine to perform one classic GCSE pass.
3384
3385 Return non-zero if a change was made. */
3386
3387static int
b5ce41ff 3388one_classic_gcse_pass (pass)
7506f491
DE
3389 int pass;
3390{
3391 int changed = 0;
3392
3393 gcse_subst_count = 0;
3394 gcse_create_count = 0;
3395
3396 alloc_expr_hash_table (max_cuid);
3397 alloc_rd_mem (n_basic_blocks, max_cuid);
b5ce41ff 3398 compute_expr_hash_table ();
7506f491
DE
3399 if (gcse_file)
3400 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3401 expr_hash_table_size, n_exprs);
3402 if (n_exprs > 0)
3403 {
3404 compute_kill_rd ();
3405 compute_rd ();
3406 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3407 compute_ae_gen ();
a42cd965 3408 compute_ae_kill (ae_gen, ae_kill);
bd0eaec2 3409 compute_available (ae_gen, ae_kill, ae_out, ae_in);
7506f491
DE
3410 changed = classic_gcse ();
3411 free_avail_expr_mem ();
3412 }
3413 free_rd_mem ();
3414 free_expr_hash_table ();
3415
3416 if (gcse_file)
3417 {
3418 fprintf (gcse_file, "\n");
3419 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3420 current_function_name, pass,
3421 bytes_used, gcse_subst_count, gcse_create_count);
3422 }
3423
3424 return changed;
3425}
3426\f
3427/* Compute copy/constant propagation working variables. */
3428
3429/* Local properties of assignments. */
3430
3431static sbitmap *cprop_pavloc;
3432static sbitmap *cprop_absaltered;
3433
3434/* Global properties of assignments (computed from the local properties). */
3435
3436static sbitmap *cprop_avin;
3437static sbitmap *cprop_avout;
3438
3439/* Allocate vars used for copy/const propagation.
3440 N_BLOCKS is the number of basic blocks.
3441 N_SETS is the number of sets. */
3442
3443static void
3444alloc_cprop_mem (n_blocks, n_sets)
3445 int n_blocks, n_sets;
3446{
3447 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3448 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3449
3450 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3451 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3452}
3453
3454/* Free vars used by copy/const propagation. */
3455
3456static void
3457free_cprop_mem ()
3458{
3459 free (cprop_pavloc);
3460 free (cprop_absaltered);
3461 free (cprop_avin);
3462 free (cprop_avout);
3463}
3464
7506f491
DE
3465/* For each block, compute whether X is transparent.
3466 X is either an expression or an assignment [though we don't care which,
3467 for this context an assignment is treated as an expression].
3468 For each block where an element of X is modified, set (SET_P == 1) or reset
3469 (SET_P == 0) the INDX bit in BMAP. */
3470
3471static void
3472compute_transp (x, indx, bmap, set_p)
3473 rtx x;
3474 int indx;
3475 sbitmap *bmap;
3476 int set_p;
3477{
3478 int bb,i;
3479 enum rtx_code code;
6f7d635c 3480 const char *fmt;
7506f491
DE
3481
3482 /* repeat is used to turn tail-recursion into iteration. */
3483 repeat:
3484
3485 if (x == 0)
3486 return;
3487
3488 code = GET_CODE (x);
3489 switch (code)
3490 {
3491 case REG:
3492 {
3493 reg_set *r;
3494 int regno = REGNO (x);
3495
3496 if (set_p)
3497 {
3498 if (regno < FIRST_PSEUDO_REGISTER)
3499 {
3500 for (bb = 0; bb < n_basic_blocks; bb++)
3501 if (TEST_BIT (reg_set_in_block[bb], regno))
3502 SET_BIT (bmap[bb], indx);
3503 }
3504 else
3505 {
3506 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3507 {
3508 bb = BLOCK_NUM (r->insn);
3509 SET_BIT (bmap[bb], indx);
3510 }
3511 }
3512 }
3513 else
3514 {
3515 if (regno < FIRST_PSEUDO_REGISTER)
3516 {
3517 for (bb = 0; bb < n_basic_blocks; bb++)
3518 if (TEST_BIT (reg_set_in_block[bb], regno))
3519 RESET_BIT (bmap[bb], indx);
3520 }
3521 else
3522 {
3523 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3524 {
3525 bb = BLOCK_NUM (r->insn);
3526 RESET_BIT (bmap[bb], indx);
3527 }
3528 }
3529 }
3530 return;
3531 }
3532
3533 case MEM:
3534 if (set_p)
3535 {
3536 for (bb = 0; bb < n_basic_blocks; bb++)
3537 if (mem_set_in_block[bb])
3538 SET_BIT (bmap[bb], indx);
3539 }
3540 else
3541 {
3542 for (bb = 0; bb < n_basic_blocks; bb++)
3543 if (mem_set_in_block[bb])
3544 RESET_BIT (bmap[bb], indx);
3545 }
3546 x = XEXP (x, 0);
3547 goto repeat;
3548
3549 case PC:
3550 case CC0: /*FIXME*/
3551 case CONST:
3552 case CONST_INT:
3553 case CONST_DOUBLE:
3554 case SYMBOL_REF:
3555 case LABEL_REF:
3556 case ADDR_VEC:
3557 case ADDR_DIFF_VEC:
3558 return;
3559
3560 default:
3561 break;
3562 }
3563
3564 i = GET_RTX_LENGTH (code) - 1;
3565 fmt = GET_RTX_FORMAT (code);
3566 for (; i >= 0; i--)
3567 {
3568 if (fmt[i] == 'e')
3569 {
3570 rtx tem = XEXP (x, i);
3571
3572 /* If we are about to do the last recursive call
3573 needed at this level, change it into iteration.
3574 This function is called enough to be worth it. */
3575 if (i == 0)
3576 {
3577 x = tem;
3578 goto repeat;
3579 }
3580 compute_transp (tem, indx, bmap, set_p);
3581 }
3582 else if (fmt[i] == 'E')
3583 {
3584 int j;
3585 for (j = 0; j < XVECLEN (x, i); j++)
3586 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3587 }
3588 }
3589}
3590
7506f491
DE
3591/* Top level routine to do the dataflow analysis needed by copy/const
3592 propagation. */
3593
3594static void
3595compute_cprop_data ()
3596{
b5ce41ff 3597 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1);
ce724250
JL
3598 compute_available (cprop_pavloc, cprop_absaltered,
3599 cprop_avout, cprop_avin);
7506f491
DE
3600}
3601\f
3602/* Copy/constant propagation. */
3603
7506f491
DE
3604/* Maximum number of register uses in an insn that we handle. */
3605#define MAX_USES 8
3606
3607/* Table of uses found in an insn.
3608 Allocated statically to avoid alloc/free complexity and overhead. */
3609static struct reg_use reg_use_table[MAX_USES];
3610
3611/* Index into `reg_use_table' while building it. */
3612static int reg_use_count;
3613
3614/* Set up a list of register numbers used in INSN.
3615 The found uses are stored in `reg_use_table'.
3616 `reg_use_count' is initialized to zero before entry, and
3617 contains the number of uses in the table upon exit.
3618
3619 ??? If a register appears multiple times we will record it multiple
3620 times. This doesn't hurt anything but it will slow things down. */
3621
3622static void
3623find_used_regs (x)
3624 rtx x;
3625{
3626 int i;
3627 enum rtx_code code;
6f7d635c 3628 const char *fmt;
7506f491
DE
3629
3630 /* repeat is used to turn tail-recursion into iteration. */
3631 repeat:
3632
3633 if (x == 0)
3634 return;
3635
3636 code = GET_CODE (x);
3637 switch (code)
3638 {
3639 case REG:
3640 if (reg_use_count == MAX_USES)
3641 return;
3642 reg_use_table[reg_use_count].reg_rtx = x;
3643 reg_use_count++;
3644 return;
3645
3646 case MEM:
3647 x = XEXP (x, 0);
3648 goto repeat;
3649
3650 case PC:
3651 case CC0:
3652 case CONST:
3653 case CONST_INT:
3654 case CONST_DOUBLE:
3655 case SYMBOL_REF:
3656 case LABEL_REF:
3657 case CLOBBER:
3658 case ADDR_VEC:
3659 case ADDR_DIFF_VEC:
3660 case ASM_INPUT: /*FIXME*/
3661 return;
3662
3663 case SET:
3664 if (GET_CODE (SET_DEST (x)) == MEM)
3665 find_used_regs (SET_DEST (x));
3666 x = SET_SRC (x);
3667 goto repeat;
3668
3669 default:
3670 break;
3671 }
3672
3673 /* Recursively scan the operands of this expression. */
3674
3675 fmt = GET_RTX_FORMAT (code);
3676 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3677 {
3678 if (fmt[i] == 'e')
3679 {
3680 /* If we are about to do the last recursive call
3681 needed at this level, change it into iteration.
3682 This function is called enough to be worth it. */
3683 if (i == 0)
3684 {
3685 x = XEXP (x, 0);
3686 goto repeat;
3687 }
3688 find_used_regs (XEXP (x, i));
3689 }
3690 else if (fmt[i] == 'E')
3691 {
3692 int j;
3693 for (j = 0; j < XVECLEN (x, i); j++)
3694 find_used_regs (XVECEXP (x, i, j));
3695 }
3696 }
3697}
3698
3699/* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3700 Returns non-zero is successful. */
3701
3702static int
3703try_replace_reg (from, to, insn)
3704 rtx from, to, insn;
3705{
833fc3ad
JH
3706 rtx note;
3707 rtx src;
3708 int success;
3709 rtx set;
3710
3711 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3712
3713 if (!note)
3714 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3715
e78d9500
JL
3716 /* If this fails we could try to simplify the result of the
3717 replacement and attempt to recognize the simplified insn.
3718
3719 But we need a general simplify_rtx that doesn't have pass
3720 specific state variables. I'm not aware of one at the moment. */
7506f491 3721
833fc3ad
JH
3722
3723 success = validate_replace_src (from, to, insn);
3724 set = single_set (insn);
3725
3726 /* We've failed to do replacement. Try to add REG_EQUAL note to not loose
3727 information. */
3728 if (!success && !note)
3729 {
3730 if (!set)
3731 return 0;
3732 note = REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL,
3733 copy_rtx (SET_SRC (set)),
3734 REG_NOTES (insn));
3735 }
3736
3737 /* Always do the replacement in REQ_EQUAL and REG_EQUIV notes. Also
3738 try to simplify them. */
3739 if (note)
3740 {
3741 rtx simplified;
3742 src = XEXP (note, 0);
3743 replace_rtx (src, from, to);
3744
3745 /* Try to simplify resulting note. */
3746 simplified = simplify_rtx (src);
3747 if (simplified)
3748 {
3749 src = simplified;
3750 XEXP (note, 0) = src;
3751 }
3752
3753 /* REG_EQUAL may get simplified into register.
3754 We don't allow that. Remove that note. This code ought
3755 not to hapen, because previous code ought to syntetize
3756 reg-reg move, but be on the safe side. */
3757 else if (REG_P (src))
3758 remove_note (insn, note);
3759 }
3760 return success;
3761}
7506f491
DE
3762/* Find a set of REGNO that is available on entry to INSN's block.
3763 Returns NULL if not found. */
3764
3765static struct expr *
3766find_avail_set (regno, insn)
3767 int regno;
3768 rtx insn;
3769{
cafba495
BS
3770 /* SET1 contains the last set found that can be returned to the caller for
3771 use in a substitution. */
3772 struct expr *set1 = 0;
3773
3774 /* Loops are not possible here. To get a loop we would need two sets
3775 available at the start of the block containing INSN. ie we would
3776 need two sets like this available at the start of the block:
3777
3778 (set (reg X) (reg Y))
3779 (set (reg Y) (reg X))
3780
3781 This can not happen since the set of (reg Y) would have killed the
3782 set of (reg X) making it unavailable at the start of this block. */
3783 while (1)
3784 {
3785 rtx src;
3786 struct expr *set = lookup_set (regno, NULL_RTX);
3787
3788 /* Find a set that is available at the start of the block
3789 which contains INSN. */
3790 while (set)
3791 {
3792 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3793 break;
3794 set = next_set (regno, set);
3795 }
7506f491 3796
cafba495
BS
3797 /* If no available set was found we've reached the end of the
3798 (possibly empty) copy chain. */
3799 if (set == 0)
3800 break;
3801
3802 if (GET_CODE (set->expr) != SET)
3803 abort ();
3804
3805 src = SET_SRC (set->expr);
3806
3807 /* We know the set is available.
3808 Now check that SRC is ANTLOC (i.e. none of the source operands
3809 have changed since the start of the block).
3810
3811 If the source operand changed, we may still use it for the next
3812 iteration of this loop, but we may not use it for substitutions. */
3813 if (CONSTANT_P (src) || oprs_not_set_p (src, insn))
3814 set1 = set;
3815
3816 /* If the source of the set is anything except a register, then
3817 we have reached the end of the copy chain. */
3818 if (GET_CODE (src) != REG)
7506f491 3819 break;
7506f491 3820
cafba495
BS
3821 /* Follow the copy chain, ie start another iteration of the loop
3822 and see if we have an available copy into SRC. */
3823 regno = REGNO (src);
3824 }
3825
3826 /* SET1 holds the last set that was available and anticipatable at
3827 INSN. */
3828 return set1;
7506f491
DE
3829}
3830
abd535b6
BS
3831/* Subroutine of cprop_insn that tries to propagate constants into
3832 JUMP_INSNS. INSN must be a conditional jump; COPY is a copy of it
3833 that we can use for substitutions.
3834 REG_USED is the use we will try to replace, SRC is the constant we
3835 will try to substitute for it.
3836 Returns nonzero if a change was made. */
3837static int
3838cprop_jump (insn, copy, reg_used, src)
3839 rtx insn, copy;
3840 struct reg_use *reg_used;
3841 rtx src;
3842{
3843 rtx set = PATTERN (copy);
3844 rtx temp;
3845
3846 /* Replace the register with the appropriate constant. */
3847 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src);
3848
3849 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)),
3850 GET_MODE (SET_SRC (set)),
3851 GET_MODE (XEXP (SET_SRC (set), 0)),
3852 XEXP (SET_SRC (set), 0),
3853 XEXP (SET_SRC (set), 1),
3854 XEXP (SET_SRC (set), 2));
3855
3856 /* If no simplification can be made, then try the next
3857 register. */
3858 if (temp == 0)
3859 return 0;
3860
3861 SET_SRC (set) = temp;
3862
3863 /* That may have changed the structure of TEMP, so
3864 force it to be rerecognized if it has not turned
3865 into a nop or unconditional jump. */
3866
3867 INSN_CODE (copy) = -1;
3868 if ((SET_DEST (set) == pc_rtx
3869 && (SET_SRC (set) == pc_rtx
3870 || GET_CODE (SET_SRC (set)) == LABEL_REF))
3871 || recog (PATTERN (copy), copy, NULL) >= 0)
3872 {
3873 /* This has either become an unconditional jump
3874 or a nop-jump. We'd like to delete nop jumps
3875 here, but doing so confuses gcse. So we just
3876 make the replacement and let later passes
3877 sort things out. */
3878 PATTERN (insn) = set;
3879 INSN_CODE (insn) = -1;
3880
3881 /* One less use of the label this insn used to jump to
3882 if we turned this into a NOP jump. */
3883 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0)
3884 --LABEL_NUSES (JUMP_LABEL (insn));
3885
3886 /* If this has turned into an unconditional jump,
3887 then put a barrier after it so that the unreachable
3888 code will be deleted. */
3889 if (GET_CODE (SET_SRC (set)) == LABEL_REF)
3890 emit_barrier_after (insn);
3891
3892 run_jump_opt_after_gcse = 1;
3893
3894 const_prop_count++;
3895 if (gcse_file != NULL)
3896 {
3897 int regno = REGNO (reg_used->reg_rtx);
3898 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3899 regno, INSN_UID (insn));
3900 print_rtl (gcse_file, src);
3901 fprintf (gcse_file, "\n");
3902 }
3903 return 1;
3904 }
3905 return 0;
3906}
3907
3908#ifdef HAVE_cc0
3909/* Subroutine of cprop_insn that tries to propagate constants into
3910 JUMP_INSNS for machines that have CC0. INSN is a single set that
3911 stores into CC0; the insn following it is a conditional jump.
3912 REG_USED is the use we will try to replace, SRC is the constant we
3913 will try to substitute for it.
3914 Returns nonzero if a change was made. */
3915static int
3916cprop_cc0_jump (insn, reg_used, src)
3917 rtx insn;
3918 struct reg_use *reg_used;
3919 rtx src;
3920{
3921 rtx jump = NEXT_INSN (insn);
3922 rtx copy = copy_rtx (jump);
3923 rtx set = PATTERN (copy);
3924
3925 /* We need to copy the source of the cc0 setter, as cprop_jump is going to
3926 substitute into it. */
3927 replace_rtx (SET_SRC (set), cc0_rtx, copy_rtx (SET_SRC (PATTERN (insn))));
3928 if (! cprop_jump (jump, copy, reg_used, src))
3929 return 0;
3930
3931 /* If we succeeded, delete the cc0 setter. */
3932 PUT_CODE (insn, NOTE);
3933 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3934 NOTE_SOURCE_FILE (insn) = 0;
3935 return 1;
3936 }
3937#endif
3938
7506f491
DE
3939/* Perform constant and copy propagation on INSN.
3940 The result is non-zero if a change was made. */
3941
3942static int
b5ce41ff 3943cprop_insn (insn, alter_jumps)
7506f491 3944 rtx insn;
b5ce41ff 3945 int alter_jumps;
7506f491
DE
3946{
3947 struct reg_use *reg_used;
3948 int changed = 0;
833fc3ad 3949 rtx note;
7506f491 3950
e78d9500
JL
3951 /* Only propagate into SETs. Note that a conditional jump is a
3952 SET with pc_rtx as the destination. */
3953 if ((GET_CODE (insn) != INSN
3954 && GET_CODE (insn) != JUMP_INSN)
7506f491
DE
3955 || GET_CODE (PATTERN (insn)) != SET)
3956 return 0;
3957
3958 reg_use_count = 0;
3959 find_used_regs (PATTERN (insn));
833fc3ad
JH
3960
3961 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
3962 if (!note)
3963 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3964
3965 /* We may win even when propagating constants into notes. */
3966 if (note)
3967 find_used_regs (XEXP (note, 0));
7506f491
DE
3968
3969 reg_used = &reg_use_table[0];
3970 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3971 {
3972 rtx pat, src;
3973 struct expr *set;
3974 int regno = REGNO (reg_used->reg_rtx);
3975
3976 /* Ignore registers created by GCSE.
3977 We do this because ... */
3978 if (regno >= max_gcse_regno)
3979 continue;
3980
3981 /* If the register has already been set in this block, there's
3982 nothing we can do. */
3983 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3984 continue;
3985
3986 /* Find an assignment that sets reg_used and is available
3987 at the start of the block. */
3988 set = find_avail_set (regno, insn);
3989 if (! set)
3990 continue;
3991
3992 pat = set->expr;
3993 /* ??? We might be able to handle PARALLELs. Later. */
3994 if (GET_CODE (pat) != SET)
3995 abort ();
3996 src = SET_SRC (pat);
3997
e78d9500 3998 /* Constant propagation. */
05f6f07c
BS
3999 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE
4000 || GET_CODE (src) == SYMBOL_REF)
7506f491 4001 {
e78d9500
JL
4002 /* Handle normal insns first. */
4003 if (GET_CODE (insn) == INSN
4004 && try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491
DE
4005 {
4006 changed = 1;
4007 const_prop_count++;
4008 if (gcse_file != NULL)
4009 {
4010 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
4011 regno, INSN_UID (insn));
e78d9500 4012 print_rtl (gcse_file, src);
7506f491
DE
4013 fprintf (gcse_file, "\n");
4014 }
4015
4016 /* The original insn setting reg_used may or may not now be
4017 deletable. We leave the deletion to flow. */
4018 }
e78d9500
JL
4019
4020 /* Try to propagate a CONST_INT into a conditional jump.
4021 We're pretty specific about what we will handle in this
4022 code, we can extend this as necessary over time.
4023
4024 Right now the insn in question must look like
abd535b6 4025 (set (pc) (if_then_else ...)) */
b5ce41ff 4026 else if (alter_jumps
6e9a3c38
JL
4027 && GET_CODE (insn) == JUMP_INSN
4028 && condjump_p (insn)
4029 && ! simplejump_p (insn))
abd535b6
BS
4030 changed |= cprop_jump (insn, copy_rtx (insn), reg_used, src);
4031#ifdef HAVE_cc0
4032 /* Similar code for machines that use a pair of CC0 setter and
4033 conditional jump insn. */
4034 else if (alter_jumps
4035 && GET_CODE (PATTERN (insn)) == SET
4036 && SET_DEST (PATTERN (insn)) == cc0_rtx
4037 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
4038 && condjump_p (NEXT_INSN (insn))
4039 && ! simplejump_p (NEXT_INSN (insn)))
4040 changed |= cprop_cc0_jump (insn, reg_used, src);
4041#endif
7506f491
DE
4042 }
4043 else if (GET_CODE (src) == REG
4044 && REGNO (src) >= FIRST_PSEUDO_REGISTER
4045 && REGNO (src) != regno)
4046 {
cafba495 4047 if (try_replace_reg (reg_used->reg_rtx, src, insn))
7506f491 4048 {
cafba495
BS
4049 changed = 1;
4050 copy_prop_count++;
4051 if (gcse_file != NULL)
7506f491 4052 {
cafba495
BS
4053 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
4054 regno, INSN_UID (insn), REGNO (src));
7506f491 4055 }
cafba495
BS
4056
4057 /* The original insn setting reg_used may or may not now be
4058 deletable. We leave the deletion to flow. */
4059 /* FIXME: If it turns out that the insn isn't deletable,
4060 then we may have unnecessarily extended register lifetimes
4061 and made things worse. */
7506f491
DE
4062 }
4063 }
4064 }
4065
4066 return changed;
4067}
4068
4069/* Forward propagate copies.
4070 This includes copies and constants.
4071 Return non-zero if a change was made. */
4072
4073static int
b5ce41ff
JL
4074cprop (alter_jumps)
4075 int alter_jumps;
7506f491
DE
4076{
4077 int bb, changed;
4078 rtx insn;
4079
4080 /* Note we start at block 1. */
4081
4082 changed = 0;
4083 for (bb = 1; bb < n_basic_blocks; bb++)
4084 {
4085 /* Reset tables used to keep track of what's still valid [since the
4086 start of the block]. */
4087 reset_opr_set_tables ();
4088
3b413743
RH
4089 for (insn = BLOCK_HEAD (bb);
4090 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
7506f491
DE
4091 insn = NEXT_INSN (insn))
4092 {
4093 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4094 {
b5ce41ff 4095 changed |= cprop_insn (insn, alter_jumps);
7506f491
DE
4096
4097 /* Keep track of everything modified by this insn. */
abd535b6
BS
4098 /* ??? Need to be careful w.r.t. mods done to INSN. Don't
4099 call mark_oprs_set if we turned the insn into a NOTE. */
4100 if (GET_CODE (insn) != NOTE)
4101 mark_oprs_set (insn);
7506f491 4102 }
ac7c5af5 4103 }
7506f491
DE
4104 }
4105
4106 if (gcse_file != NULL)
4107 fprintf (gcse_file, "\n");
4108
4109 return changed;
4110}
4111
4112/* Perform one copy/constant propagation pass.
4113 F is the first insn in the function.
4114 PASS is the pass count. */
4115
4116static int
b5ce41ff 4117one_cprop_pass (pass, alter_jumps)
7506f491 4118 int pass;
b5ce41ff 4119 int alter_jumps;
7506f491
DE
4120{
4121 int changed = 0;
4122
4123 const_prop_count = 0;
4124 copy_prop_count = 0;
4125
4126 alloc_set_hash_table (max_cuid);
b5ce41ff 4127 compute_set_hash_table ();
7506f491
DE
4128 if (gcse_file)
4129 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
4130 n_sets);
4131 if (n_sets > 0)
4132 {
4133 alloc_cprop_mem (n_basic_blocks, n_sets);
4134 compute_cprop_data ();
b5ce41ff 4135 changed = cprop (alter_jumps);
7506f491
DE
4136 free_cprop_mem ();
4137 }
4138 free_set_hash_table ();
4139
4140 if (gcse_file)
4141 {
4142 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
4143 current_function_name, pass,
4144 bytes_used, const_prop_count, copy_prop_count);
4145 fprintf (gcse_file, "\n");
4146 }
4147
4148 return changed;
4149}
4150\f
a65f3558 4151/* Compute PRE+LCM working variables. */
7506f491
DE
4152
4153/* Local properties of expressions. */
4154/* Nonzero for expressions that are transparent in the block. */
a65f3558 4155static sbitmap *transp;
7506f491 4156
5c35539b
RH
4157/* Nonzero for expressions that are transparent at the end of the block.
4158 This is only zero for expressions killed by abnormal critical edge
4159 created by a calls. */
a65f3558 4160static sbitmap *transpout;
5c35539b 4161
a65f3558
JL
4162/* Nonzero for expressions that are computed (available) in the block. */
4163static sbitmap *comp;
7506f491 4164
a65f3558
JL
4165/* Nonzero for expressions that are locally anticipatable in the block. */
4166static sbitmap *antloc;
7506f491 4167
a65f3558
JL
4168/* Nonzero for expressions where this block is an optimal computation
4169 point. */
4170static sbitmap *pre_optimal;
5c35539b 4171
a65f3558
JL
4172/* Nonzero for expressions which are redundant in a particular block. */
4173static sbitmap *pre_redundant;
7506f491 4174
a42cd965
AM
4175/* Nonzero for expressions which should be inserted on a specific edge. */
4176static sbitmap *pre_insert_map;
4177
4178/* Nonzero for expressions which should be deleted in a specific block. */
4179static sbitmap *pre_delete_map;
4180
4181/* Contains the edge_list returned by pre_edge_lcm. */
4182static struct edge_list *edge_list;
4183
a65f3558 4184static sbitmap *temp_bitmap;
7506f491 4185
a65f3558
JL
4186/* Redundant insns. */
4187static sbitmap pre_redundant_insns;
7506f491 4188
a65f3558 4189/* Allocate vars used for PRE analysis. */
7506f491
DE
4190
4191static void
a65f3558
JL
4192alloc_pre_mem (n_blocks, n_exprs)
4193 int n_blocks, n_exprs;
7506f491 4194{
a65f3558
JL
4195 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4196 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4197 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
a65f3558 4198 temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs);
5faf03ae 4199
a42cd965
AM
4200 pre_optimal = NULL;
4201 pre_redundant = NULL;
4202 pre_insert_map = NULL;
4203 pre_delete_map = NULL;
4204 ae_in = NULL;
4205 ae_out = NULL;
4206 u_bitmap = NULL;
a65f3558 4207 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
a42cd965
AM
4208 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
4209 /* pre_insert and pre_delete are allocated later. */
7506f491
DE
4210}
4211
a65f3558 4212/* Free vars used for PRE analysis. */
7506f491
DE
4213
4214static void
a65f3558 4215free_pre_mem ()
7506f491 4216{
a65f3558
JL
4217 free (transp);
4218 free (comp);
4219 free (antloc);
5faf03ae 4220 free (temp_bitmap);
7506f491 4221
a42cd965
AM
4222 if (pre_optimal)
4223 free (pre_optimal);
4224 if (pre_redundant)
4225 free (pre_redundant);
4226 if (pre_insert_map)
4227 free (pre_insert_map);
4228 if (pre_delete_map)
4229 free (pre_delete_map);
4230 if (transpout)
4231 free (transpout);
4232
4233 if (ae_in)
4234 free (ae_in);
4235 if (ae_out)
4236 free (ae_out);
4237 if (ae_kill)
4238 free (ae_kill);
4239 if (u_bitmap)
4240 free (u_bitmap);
4241
4242 transp = comp = antloc = NULL;
4243 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
4244 transpout = ae_in = ae_out = ae_kill = NULL;
4245 u_bitmap = NULL;
4246
7506f491
DE
4247}
4248
4249/* Top level routine to do the dataflow analysis needed by PRE. */
4250
4251static void
4252compute_pre_data ()
4253{
a65f3558
JL
4254 compute_local_properties (transp, comp, antloc, 0);
4255 compute_transpout ();
a42cd965
AM
4256 sbitmap_vector_zero (ae_kill, n_basic_blocks);
4257 compute_ae_kill (comp, ae_kill);
4258 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc,
4259 ae_kill, &pre_insert_map, &pre_delete_map);
7506f491 4260}
a65f3558 4261
7506f491
DE
4262\f
4263/* PRE utilities */
4264
a65f3558
JL
4265/* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach
4266 block BB.
7506f491
DE
4267
4268 VISITED is a pointer to a working buffer for tracking which BB's have
4269 been visited. It is NULL for the top-level call.
4270
4271 We treat reaching expressions that go through blocks containing the same
4272 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4273 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4274 2 as not reaching. The intent is to improve the probability of finding
4275 only one reaching expression and to reduce register lifetimes by picking
4276 the closest such expression. */
4277
4278static int
89e606c9 4279pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited)
a65f3558 4280 int occr_bb;
7506f491
DE
4281 struct expr *expr;
4282 int bb;
4283 char *visited;
4284{
36349f8b 4285 edge pred;
7506f491 4286
36349f8b 4287 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
7506f491 4288 {
36349f8b 4289 int pred_bb = pred->src->index;
7506f491 4290
36349f8b 4291 if (pred->src == ENTRY_BLOCK_PTR
7506f491
DE
4292 /* Has predecessor has already been visited? */
4293 || visited[pred_bb])
ac7c5af5 4294 {
7506f491
DE
4295 /* Nothing to do. */
4296 }
4297 /* Does this predecessor generate this expression? */
89e606c9 4298 else if (TEST_BIT (comp[pred_bb], expr->bitmap_index))
7506f491
DE
4299 {
4300 /* Is this the occurrence we're looking for?
4301 Note that there's only one generating occurrence per block
4302 so we just need to check the block number. */
a65f3558 4303 if (occr_bb == pred_bb)
7506f491
DE
4304 return 1;
4305 visited[pred_bb] = 1;
4306 }
4307 /* Ignore this predecessor if it kills the expression. */
a65f3558 4308 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index))
7506f491
DE
4309 visited[pred_bb] = 1;
4310 /* Neither gen nor kill. */
4311 else
ac7c5af5 4312 {
7506f491 4313 visited[pred_bb] = 1;
89e606c9 4314 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
7506f491 4315 return 1;
ac7c5af5 4316 }
7506f491
DE
4317 }
4318
4319 /* All paths have been checked. */
4320 return 0;
4321}
283a2545
RL
4322
4323/* The wrapper for pre_expr_reaches_here_work that ensures that any
4324 memory allocated for that function is returned. */
4325
4326static int
89e606c9 4327pre_expr_reaches_here_p (occr_bb, expr, bb)
283a2545
RL
4328 int occr_bb;
4329 struct expr *expr;
4330 int bb;
283a2545
RL
4331{
4332 int rval;
4333 char * visited = (char *) xcalloc (n_basic_blocks, 1);
4334
89e606c9 4335 rval = pre_expr_reaches_here_p_work(occr_bb, expr, bb, visited);
283a2545
RL
4336
4337 free (visited);
4338
4339 return (rval);
4340}
7506f491 4341\f
a42cd965
AM
4342
4343/* Given an expr, generate RTL which we can insert at the end of a BB,
4344 or on an edge. Set the block number of any insns generated to
4345 the value of BB. */
4346
4347static rtx
4348process_insert_insn (expr)
4349 struct expr *expr;
4350{
4351 rtx reg = expr->reaching_reg;
4352 rtx pat, copied_expr;
4353 rtx first_new_insn;
4354
4355 start_sequence ();
4356 copied_expr = copy_rtx (expr->expr);
4357 emit_move_insn (reg, copied_expr);
4358 first_new_insn = get_insns ();
4359 pat = gen_sequence ();
4360 end_sequence ();
4361
4362 return pat;
4363}
4364
a65f3558
JL
4365/* Add EXPR to the end of basic block BB.
4366
4367 This is used by both the PRE and code hoisting.
4368
4369 For PRE, we want to verify that the expr is either transparent
4370 or locally anticipatable in the target block. This check makes
4371 no sense for code hoisting. */
7506f491
DE
4372
4373static void
a65f3558 4374insert_insn_end_bb (expr, bb, pre)
7506f491
DE
4375 struct expr *expr;
4376 int bb;
a65f3558 4377 int pre;
7506f491
DE
4378{
4379 rtx insn = BLOCK_END (bb);
4380 rtx new_insn;
4381 rtx reg = expr->reaching_reg;
4382 int regno = REGNO (reg);
a42cd965 4383 rtx pat;
7506f491 4384
a42cd965 4385 pat = process_insert_insn (expr);
7506f491
DE
4386
4387 /* If the last insn is a jump, insert EXPR in front [taking care to
4388 handle cc0, etc. properly]. */
4389
4390 if (GET_CODE (insn) == JUMP_INSN)
4391 {
50b2596f 4392#ifdef HAVE_cc0
7506f491 4393 rtx note;
50b2596f 4394#endif
7506f491
DE
4395
4396 /* If this is a jump table, then we can't insert stuff here. Since
4397 we know the previous real insn must be the tablejump, we insert
4398 the new instruction just before the tablejump. */
4399 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4400 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4401 insn = prev_real_insn (insn);
4402
4403#ifdef HAVE_cc0
4404 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4405 if cc0 isn't set. */
4406 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4407 if (note)
4408 insn = XEXP (note, 0);
4409 else
4410 {
4411 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4412 if (maybe_cc0_setter
4413 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4414 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4415 insn = maybe_cc0_setter;
4416 }
4417#endif
4418 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4419 new_insn = emit_insn_before (pat, insn);
4420 if (BLOCK_HEAD (bb) == insn)
4421 BLOCK_HEAD (bb) = new_insn;
3947e2f9
RH
4422 }
4423 /* Likewise if the last insn is a call, as will happen in the presence
4424 of exception handling. */
5c35539b 4425 else if (GET_CODE (insn) == CALL_INSN)
3947e2f9
RH
4426 {
4427 HARD_REG_SET parm_regs;
4428 int nparm_regs;
4429 rtx p;
4430
4431 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4432 we search backward and place the instructions before the first
4433 parameter is loaded. Do this for everyone for consistency and a
4434 presumtion that we'll get better code elsewhere as well. */
4435
4436 /* It should always be the case that we can put these instructions
a65f3558
JL
4437 anywhere in the basic block with performing PRE optimizations.
4438 Check this. */
4439 if (pre
4440 && !TEST_BIT (antloc[bb], expr->bitmap_index)
4441 && !TEST_BIT (transp[bb], expr->bitmap_index))
3947e2f9
RH
4442 abort ();
4443
4444 /* Since different machines initialize their parameter registers
4445 in different orders, assume nothing. Collect the set of all
4446 parameter registers. */
4447 CLEAR_HARD_REG_SET (parm_regs);
4448 nparm_regs = 0;
4449 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4450 if (GET_CODE (XEXP (p, 0)) == USE
4451 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4452 {
4453 int regno = REGNO (XEXP (XEXP (p, 0), 0));
4454 if (regno >= FIRST_PSEUDO_REGISTER)
5c35539b 4455 abort ();
3947e2f9
RH
4456 SET_HARD_REG_BIT (parm_regs, regno);
4457 nparm_regs++;
4458 }
4459
4460 /* Search backward for the first set of a register in this set. */
4461 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4462 {
4463 insn = PREV_INSN (insn);
4464 p = single_set (insn);
4465 if (p && GET_CODE (SET_DEST (p)) == REG
4466 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4467 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4468 {
4469 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4470 nparm_regs--;
4471 }
4472 }
4473
b1d26727
JL
4474 /* If we found all the parameter loads, then we want to insert
4475 before the first parameter load.
4476
4477 If we did not find all the parameter loads, then we might have
4478 stopped on the head of the block, which could be a CODE_LABEL.
4479 If we inserted before the CODE_LABEL, then we would be putting
4480 the insn in the wrong basic block. In that case, put the insn
4481 after the CODE_LABEL.
4482
4483 ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here? */
4484 if (GET_CODE (insn) != CODE_LABEL)
4485 {
4486 new_insn = emit_insn_before (pat, insn);
4487 if (BLOCK_HEAD (bb) == insn)
4488 BLOCK_HEAD (bb) = new_insn;
4489 }
4490 else
4491 {
4492 new_insn = emit_insn_after (pat, insn);
4493 }
7506f491
DE
4494 }
4495 else
4496 {
4497 new_insn = emit_insn_after (pat, insn);
4498 BLOCK_END (bb) = new_insn;
7506f491
DE
4499 }
4500
a65f3558
JL
4501 /* Keep block number table up to date.
4502 Note, PAT could be a multiple insn sequence, we have to make
4503 sure that each insn in the sequence is handled. */
4504 if (GET_CODE (pat) == SEQUENCE)
4505 {
4506 int i;
4507
4508 for (i = 0; i < XVECLEN (pat, 0); i++)
4509 {
4510 rtx insn = XVECEXP (pat, 0, i);
4511 set_block_num (insn, bb);
4512 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
4513 add_label_notes (PATTERN (insn), new_insn);
84832317 4514 note_stores (PATTERN (insn), record_set_info, insn);
a65f3558
JL
4515 }
4516 }
4517 else
4518 {
4519 add_label_notes (SET_SRC (pat), new_insn);
4520 set_block_num (new_insn, bb);
4521 /* Keep register set table up to date. */
4522 record_one_set (regno, new_insn);
4523 }
3947e2f9 4524
7506f491
DE
4525 gcse_create_count++;
4526
4527 if (gcse_file)
4528 {
a65f3558 4529 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n",
7506f491
DE
4530 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4531 }
4532}
4533
a42cd965
AM
4534/* Insert partially redundant expressions on edges in the CFG to make
4535 the expressions fully redundant. */
7506f491 4536
a42cd965
AM
4537static int
4538pre_edge_insert (edge_list, index_map)
4539 struct edge_list *edge_list;
7506f491
DE
4540 struct expr **index_map;
4541{
a42cd965 4542 int e, i, num_edges, set_size, did_insert = 0;
a65f3558
JL
4543 sbitmap *inserted;
4544
a42cd965
AM
4545 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
4546 if it reaches any of the deleted expressions. */
7506f491 4547
a42cd965
AM
4548 set_size = pre_insert_map[0]->size;
4549 num_edges = NUM_EDGES (edge_list);
4550 inserted = sbitmap_vector_alloc (num_edges, n_exprs);
4551 sbitmap_vector_zero (inserted, num_edges);
7506f491 4552
a42cd965 4553 for (e = 0; e < num_edges; e++)
7506f491
DE
4554 {
4555 int indx;
a42cd965
AM
4556 basic_block pred = INDEX_EDGE_PRED_BB (edge_list, e);
4557 int bb = pred->index;
a65f3558 4558
a65f3558 4559 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
7506f491 4560 {
a42cd965 4561 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
7506f491 4562 int j;
7506f491 4563
a65f3558 4564 for (j = indx; insert && j < n_exprs; j++, insert >>= 1)
7506f491 4565 {
a65f3558
JL
4566 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
4567 {
4568 struct expr *expr = index_map[j];
4569 struct occr *occr;
4570
4571 /* Now look at each deleted occurence of this expression. */
4572 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4573 {
4574 if (! occr->deleted_p)
4575 continue;
4576
a42cd965
AM
4577 /* Insert this expression on this edge if if it would
4578 reach the deleted occurence in BB. */
89e606c9 4579 if (!TEST_BIT (inserted[e], j))
a65f3558 4580 {
a42cd965
AM
4581 rtx insn;
4582 edge eg = INDEX_EDGE (edge_list, e);
4583 /* We can't insert anything on an abnormal
4584 and critical edge, so we insert the
4585 insn at the end of the previous block. There
4586 are several alternatives detailed in
4587 Morgans book P277 (sec 10.5) for handling
4588 this situation. This one is easiest for now. */
4589
4590 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
4591 {
4592 insert_insn_end_bb (index_map[j], bb, 0);
4593 }
4594 else
4595 {
4596 insn = process_insert_insn (index_map[j]);
4597 insert_insn_on_edge (insn, eg);
4598 }
4599 if (gcse_file)
4600 {
4601 fprintf (gcse_file,
4602 "PRE/HOIST: edge (%d,%d), copy expression %d\n",
4603 bb,
4604 INDEX_EDGE_SUCC_BB (edge_list, e)->index, expr->bitmap_index);
4605 }
4606 SET_BIT (inserted[e], j);
4607 did_insert = 1;
4608 gcse_create_count++;
a65f3558
JL
4609 }
4610 }
4611 }
7506f491
DE
4612 }
4613 }
4614 }
5faf03ae
MM
4615
4616 /* Clean up. */
4617 free (inserted);
4618
a42cd965 4619 return did_insert;
7506f491
DE
4620}
4621
4622/* Copy the result of INSN to REG.
4623 INDX is the expression number. */
4624
4625static void
4626pre_insert_copy_insn (expr, insn)
4627 struct expr *expr;
4628 rtx insn;
4629{
4630 rtx reg = expr->reaching_reg;
4631 int regno = REGNO (reg);
4632 int indx = expr->bitmap_index;
4633 rtx set = single_set (insn);
4634 rtx new_insn;
a42cd965 4635 int bb = BLOCK_NUM (insn);
7506f491
DE
4636
4637 if (!set)
4638 abort ();
9e6a5703 4639 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
7506f491
DE
4640 insn);
4641 /* Keep block number table up to date. */
a42cd965 4642 set_block_num (new_insn, bb);
7506f491
DE
4643 /* Keep register set table up to date. */
4644 record_one_set (regno, new_insn);
a42cd965
AM
4645 if (insn == BLOCK_END (bb))
4646 BLOCK_END (bb) = new_insn;
7506f491
DE
4647
4648 gcse_create_count++;
4649
4650 if (gcse_file)
a42cd965
AM
4651 fprintf (gcse_file,
4652 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
4653 BLOCK_NUM (insn), INSN_UID (new_insn), indx,
4654 INSN_UID (insn), regno);
7506f491
DE
4655}
4656
4657/* Copy available expressions that reach the redundant expression
4658 to `reaching_reg'. */
4659
4660static void
4661pre_insert_copies ()
4662{
36f0e0a6 4663 int i;
a65f3558 4664
7506f491
DE
4665 /* For each available expression in the table, copy the result to
4666 `reaching_reg' if the expression reaches a deleted one.
4667
4668 ??? The current algorithm is rather brute force.
4669 Need to do some profiling. */
4670
4671 for (i = 0; i < expr_hash_table_size; i++)
4672 {
4673 struct expr *expr;
4674
4675 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4676 {
4677 struct occr *occr;
4678
4679 /* If the basic block isn't reachable, PPOUT will be TRUE.
4680 However, we don't want to insert a copy here because the
4681 expression may not really be redundant. So only insert
4682 an insn if the expression was deleted.
4683 This test also avoids further processing if the expression
4684 wasn't deleted anywhere. */
4685 if (expr->reaching_reg == NULL)
4686 continue;
4687
4688 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4689 {
4690 struct occr *avail;
4691
4692 if (! occr->deleted_p)
4693 continue;
4694
4695 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4696 {
4697 rtx insn = avail->insn;
4698
4699 /* No need to handle this one if handled already. */
4700 if (avail->copied_p)
4701 continue;
4702 /* Don't handle this one if it's a redundant one. */
a65f3558 4703 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn)))
7506f491
DE
4704 continue;
4705 /* Or if the expression doesn't reach the deleted one. */
a65f3558 4706 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr,
89e606c9 4707 BLOCK_NUM (occr->insn)))
7506f491
DE
4708 continue;
4709
4710 /* Copy the result of avail to reaching_reg. */
4711 pre_insert_copy_insn (expr, insn);
4712 avail->copied_p = 1;
4713 }
4714 }
4715 }
4716 }
4717}
4718
4719/* Delete redundant computations.
7506f491
DE
4720 Deletion is done by changing the insn to copy the `reaching_reg' of
4721 the expression into the result of the SET. It is left to later passes
4722 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4723
4724 Returns non-zero if a change is made. */
4725
4726static int
4727pre_delete ()
4728{
a65f3558
JL
4729 int i, bb, changed;
4730
4731 /* Compute the expressions which are redundant and need to be replaced by
4732 copies from the reaching reg to the target reg. */
4733 for (bb = 0; bb < n_basic_blocks; bb++)
a42cd965 4734 sbitmap_copy (temp_bitmap[bb], pre_delete_map[bb]);
7506f491
DE
4735
4736 changed = 0;
4737 for (i = 0; i < expr_hash_table_size; i++)
4738 {
4739 struct expr *expr;
4740
4741 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4742 {
4743 struct occr *occr;
4744 int indx = expr->bitmap_index;
4745
4746 /* We only need to search antic_occr since we require
4747 ANTLOC != 0. */
4748
4749 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4750 {
4751 rtx insn = occr->insn;
4752 rtx set;
4753 int bb = BLOCK_NUM (insn);
7506f491 4754
a65f3558 4755 if (TEST_BIT (temp_bitmap[bb], indx))
7506f491 4756 {
7506f491
DE
4757 set = single_set (insn);
4758 if (! set)
4759 abort ();
4760
d3903c22
JL
4761 /* Create a pseudo-reg to store the result of reaching
4762 expressions into. Get the mode for the new pseudo
4763 from the mode of the original destination pseudo. */
4764 if (expr->reaching_reg == NULL)
4765 expr->reaching_reg
4766 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4767
7506f491
DE
4768 /* In theory this should never fail since we're creating
4769 a reg->reg copy.
4770
4771 However, on the x86 some of the movXX patterns actually
4772 contain clobbers of scratch regs. This may cause the
db35306d 4773 insn created by validate_change to not match any pattern
7506f491
DE
4774 and thus cause validate_change to fail. */
4775 if (validate_change (insn, &SET_SRC (set),
4776 expr->reaching_reg, 0))
4777 {
4778 occr->deleted_p = 1;
a65f3558 4779 SET_BIT (pre_redundant_insns, INSN_CUID (insn));
7506f491
DE
4780 changed = 1;
4781 gcse_subst_count++;
4782 }
4783
4784 if (gcse_file)
4785 {
a65f3558
JL
4786 fprintf (gcse_file,
4787 "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
7506f491
DE
4788 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4789 }
4790 }
4791 }
4792 }
4793 }
4794
4795 return changed;
4796}
4797
4798/* Perform GCSE optimizations using PRE.
4799 This is called by one_pre_gcse_pass after all the dataflow analysis
4800 has been done.
4801
a65f3558
JL
4802 This is based on the original Morel-Renvoise paper Fred Chow's thesis,
4803 and lazy code motion from Knoop, Ruthing and Steffen as described in
4804 Advanced Compiler Design and Implementation.
7506f491
DE
4805
4806 ??? A new pseudo reg is created to hold the reaching expression.
4807 The nice thing about the classical approach is that it would try to
4808 use an existing reg. If the register can't be adequately optimized
4809 [i.e. we introduce reload problems], one could add a pass here to
4810 propagate the new register through the block.
4811
4812 ??? We don't handle single sets in PARALLELs because we're [currently]
4813 not able to copy the rest of the parallel when we insert copies to create
4814 full redundancies from partial redundancies. However, there's no reason
4815 why we can't handle PARALLELs in the cases where there are no partial
4816 redundancies. */
4817
4818static int
4819pre_gcse ()
4820{
a42cd965 4821 int i, did_insert;
7506f491
DE
4822 int changed;
4823 struct expr **index_map;
4824
4825 /* Compute a mapping from expression number (`bitmap_index') to
4826 hash table entry. */
4827
dd1bd863 4828 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
7506f491
DE
4829 for (i = 0; i < expr_hash_table_size; i++)
4830 {
4831 struct expr *expr;
4832
4833 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4834 index_map[expr->bitmap_index] = expr;
4835 }
4836
4837 /* Reset bitmap used to track which insns are redundant. */
a65f3558
JL
4838 pre_redundant_insns = sbitmap_alloc (max_cuid);
4839 sbitmap_zero (pre_redundant_insns);
7506f491
DE
4840
4841 /* Delete the redundant insns first so that
4842 - we know what register to use for the new insns and for the other
4843 ones with reaching expressions
4844 - we know which insns are redundant when we go to create copies */
4845 changed = pre_delete ();
4846
a42cd965 4847 did_insert = pre_edge_insert (edge_list, index_map);
7506f491 4848 /* In other places with reaching expressions, copy the expression to the
a42cd965 4849 specially allocated pseudo-reg that reaches the redundant expr. */
7506f491 4850 pre_insert_copies ();
a42cd965
AM
4851 if (did_insert)
4852 {
4853 commit_edge_insertions ();
4854 changed = 1;
4855 }
7506f491 4856
283a2545 4857 free (index_map);
a65f3558 4858 free (pre_redundant_insns);
7506f491
DE
4859
4860 return changed;
4861}
4862
4863/* Top level routine to perform one PRE GCSE pass.
4864
4865 Return non-zero if a change was made. */
4866
4867static int
b5ce41ff 4868one_pre_gcse_pass (pass)
7506f491
DE
4869 int pass;
4870{
4871 int changed = 0;
4872
4873 gcse_subst_count = 0;
4874 gcse_create_count = 0;
4875
4876 alloc_expr_hash_table (max_cuid);
a42cd965 4877 add_noreturn_fake_exit_edges ();
b5ce41ff 4878 compute_expr_hash_table ();
7506f491
DE
4879 if (gcse_file)
4880 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4881 expr_hash_table_size, n_exprs);
4882 if (n_exprs > 0)
4883 {
4884 alloc_pre_mem (n_basic_blocks, n_exprs);
4885 compute_pre_data ();
4886 changed |= pre_gcse ();
a42cd965 4887 free_edge_list (edge_list);
7506f491
DE
4888 free_pre_mem ();
4889 }
a42cd965 4890 remove_fake_edges ();
7506f491
DE
4891 free_expr_hash_table ();
4892
4893 if (gcse_file)
4894 {
4895 fprintf (gcse_file, "\n");
4896 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4897 current_function_name, pass,
4898 bytes_used, gcse_subst_count, gcse_create_count);
4899 }
4900
4901 return changed;
4902}
aeb2f500
JW
4903\f
4904/* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4905 We have to add REG_LABEL notes, because the following loop optimization
4906 pass requires them. */
4907
4908/* ??? This is very similar to the loop.c add_label_notes function. We
4909 could probably share code here. */
4910
4911/* ??? If there was a jump optimization pass after gcse and before loop,
4912 then we would not need to do this here, because jump would add the
4913 necessary REG_LABEL notes. */
4914
4915static void
4916add_label_notes (x, insn)
4917 rtx x;
4918 rtx insn;
4919{
4920 enum rtx_code code = GET_CODE (x);
4921 int i, j;
6f7d635c 4922 const char *fmt;
aeb2f500
JW
4923
4924 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4925 {
6b3603c2 4926 /* This code used to ignore labels that referred to dispatch tables to
ac7c5af5 4927 avoid flow generating (slighly) worse code.
6b3603c2 4928
ac7c5af5
JL
4929 We no longer ignore such label references (see LABEL_REF handling in
4930 mark_jump_label for additional information). */
6b3603c2
JL
4931 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4932 REG_NOTES (insn));
aeb2f500
JW
4933 return;
4934 }
4935
4936 fmt = GET_RTX_FORMAT (code);
4937 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4938 {
4939 if (fmt[i] == 'e')
4940 add_label_notes (XEXP (x, i), insn);
4941 else if (fmt[i] == 'E')
4942 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4943 add_label_notes (XVECEXP (x, i, j), insn);
4944 }
4945}
a65f3558
JL
4946
4947/* Compute transparent outgoing information for each block.
4948
4949 An expression is transparent to an edge unless it is killed by
4950 the edge itself. This can only happen with abnormal control flow,
4951 when the edge is traversed through a call. This happens with
4952 non-local labels and exceptions.
4953
4954 This would not be necessary if we split the edge. While this is
4955 normally impossible for abnormal critical edges, with some effort
4956 it should be possible with exception handling, since we still have
4957 control over which handler should be invoked. But due to increased
4958 EH table sizes, this may not be worthwhile. */
4959
4960static void
4961compute_transpout ()
4962{
4963 int bb;
4964
4965 sbitmap_vector_ones (transpout, n_basic_blocks);
4966
4967 for (bb = 0; bb < n_basic_blocks; ++bb)
4968 {
4969 int i;
4970
4971 /* Note that flow inserted a nop a the end of basic blocks that
4972 end in call instructions for reasons other than abnormal
4973 control flow. */
4974 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4975 continue;
4976
4977 for (i = 0; i < expr_hash_table_size; i++)
4978 {
4979 struct expr *expr;
4980 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4981 if (GET_CODE (expr->expr) == MEM)
4982 {
4983 rtx addr = XEXP (expr->expr, 0);
4984
4985 if (GET_CODE (addr) == SYMBOL_REF
4986 && CONSTANT_POOL_ADDRESS_P (addr))
4987 continue;
4988
4989 /* ??? Optimally, we would use interprocedural alias
4990 analysis to determine if this mem is actually killed
4991 by this call. */
4992 RESET_BIT (transpout[bb], expr->bitmap_index);
4993 }
4994 }
4995 }
4996}
dfdb644f
JL
4997
4998/* Removal of useless null pointer checks */
4999
dfdb644f 5000/* Called via note_stores. X is set by SETTER. If X is a register we must
0511851c
MM
5001 invalidate nonnull_local and set nonnull_killed. DATA is really a
5002 `null_pointer_info *'.
dfdb644f
JL
5003
5004 We ignore hard registers. */
5005static void
84832317 5006invalidate_nonnull_info (x, setter, data)
dfdb644f
JL
5007 rtx x;
5008 rtx setter ATTRIBUTE_UNUSED;
0511851c 5009 void *data;
dfdb644f
JL
5010{
5011 int offset, regno;
0511851c 5012 struct null_pointer_info* npi = (struct null_pointer_info *) data;
dfdb644f
JL
5013
5014 offset = 0;
5015 while (GET_CODE (x) == SUBREG)
5016 x = SUBREG_REG (x);
5017
5018 /* Ignore anything that is not a register or is a hard register. */
5019 if (GET_CODE (x) != REG
0511851c
MM
5020 || REGNO (x) < npi->min_reg
5021 || REGNO (x) >= npi->max_reg)
dfdb644f
JL
5022 return;
5023
0511851c 5024 regno = REGNO (x) - npi->min_reg;
dfdb644f 5025
0511851c
MM
5026 RESET_BIT (npi->nonnull_local[npi->current_block], regno);
5027 SET_BIT (npi->nonnull_killed[npi->current_block], regno);
dfdb644f
JL
5028}
5029
0511851c
MM
5030/* Do null-pointer check elimination for the registers indicated in
5031 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
5032 they are not our responsibility to free. */
dfdb644f 5033
0511851c 5034static void
b71a2ff8 5035delete_null_pointer_checks_1 (block_reg, nonnull_avin, nonnull_avout, npi)
0511851c
MM
5036 int *block_reg;
5037 sbitmap *nonnull_avin;
5038 sbitmap *nonnull_avout;
5039 struct null_pointer_info *npi;
dfdb644f 5040{
ce724250 5041 int bb;
0511851c
MM
5042 int current_block;
5043 sbitmap *nonnull_local = npi->nonnull_local;
5044 sbitmap *nonnull_killed = npi->nonnull_killed;
dfdb644f 5045
dfdb644f
JL
5046 /* Compute local properties, nonnull and killed. A register will have
5047 the nonnull property if at the end of the current block its value is
5048 known to be nonnull. The killed property indicates that somewhere in
5049 the block any information we had about the register is killed.
5050
5051 Note that a register can have both properties in a single block. That
5052 indicates that it's killed, then later in the block a new value is
5053 computed. */
5054 sbitmap_vector_zero (nonnull_local, n_basic_blocks);
5055 sbitmap_vector_zero (nonnull_killed, n_basic_blocks);
5056 for (current_block = 0; current_block < n_basic_blocks; current_block++)
5057 {
5058 rtx insn, stop_insn;
5059
0511851c
MM
5060 /* Set the current block for invalidate_nonnull_info. */
5061 npi->current_block = current_block;
5062
dfdb644f
JL
5063 /* Scan each insn in the basic block looking for memory references and
5064 register sets. */
5065 stop_insn = NEXT_INSN (BLOCK_END (current_block));
5066 for (insn = BLOCK_HEAD (current_block);
5067 insn != stop_insn;
5068 insn = NEXT_INSN (insn))
5069 {
5070 rtx set;
0511851c 5071 rtx reg;
dfdb644f
JL
5072
5073 /* Ignore anything that is not a normal insn. */
5074 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5075 continue;
5076
5077 /* Basically ignore anything that is not a simple SET. We do have
5078 to make sure to invalidate nonnull_local and set nonnull_killed
5079 for such insns though. */
5080 set = single_set (insn);
5081 if (!set)
5082 {
0511851c 5083 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
5084 continue;
5085 }
5086
5087 /* See if we've got a useable memory load. We handle it first
5088 in case it uses its address register as a dest (which kills
5089 the nonnull property). */
5090 if (GET_CODE (SET_SRC (set)) == MEM
0511851c
MM
5091 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
5092 && REGNO (reg) >= npi->min_reg
5093 && REGNO (reg) < npi->max_reg)
dfdb644f 5094 SET_BIT (nonnull_local[current_block],
0511851c 5095 REGNO (reg) - npi->min_reg);
dfdb644f
JL
5096
5097 /* Now invalidate stuff clobbered by this insn. */
0511851c 5098 note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
dfdb644f
JL
5099
5100 /* And handle stores, we do these last since any sets in INSN can
5101 not kill the nonnull property if it is derived from a MEM
5102 appearing in a SET_DEST. */
5103 if (GET_CODE (SET_DEST (set)) == MEM
0511851c
MM
5104 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
5105 && REGNO (reg) >= npi->min_reg
5106 && REGNO (reg) < npi->max_reg)
dfdb644f 5107 SET_BIT (nonnull_local[current_block],
0511851c 5108 REGNO (reg) - npi->min_reg);
dfdb644f
JL
5109 }
5110 }
5111
5112 /* Now compute global properties based on the local properties. This
5113 is a classic global availablity algorithm. */
ce724250
JL
5114 compute_available (nonnull_local, nonnull_killed,
5115 nonnull_avout, nonnull_avin);
dfdb644f
JL
5116
5117 /* Now look at each bb and see if it ends with a compare of a value
5118 against zero. */
5119 for (bb = 0; bb < n_basic_blocks; bb++)
5120 {
5121 rtx last_insn = BLOCK_END (bb);
0511851c 5122 rtx condition, earliest;
dfdb644f
JL
5123 int compare_and_branch;
5124
0511851c
MM
5125 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
5126 since BLOCK_REG[BB] is zero if this block did not end with a
5127 comparison against zero, this condition works. */
5128 if (block_reg[bb] < npi->min_reg
5129 || block_reg[bb] >= npi->max_reg)
dfdb644f
JL
5130 continue;
5131
5132 /* LAST_INSN is a conditional jump. Get its condition. */
5133 condition = get_condition (last_insn, &earliest);
5134
40d7a3fe
NB
5135 /* If we can't determine the condition then skip. */
5136 if (! condition)
5137 continue;
5138
dfdb644f 5139 /* Is the register known to have a nonzero value? */
0511851c 5140 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
dfdb644f
JL
5141 continue;
5142
5143 /* Try to compute whether the compare/branch at the loop end is one or
5144 two instructions. */
5145 if (earliest == last_insn)
5146 compare_and_branch = 1;
5147 else if (earliest == prev_nonnote_insn (last_insn))
5148 compare_and_branch = 2;
5149 else
5150 continue;
5151
5152 /* We know the register in this comparison is nonnull at exit from
5153 this block. We can optimize this comparison. */
5154 if (GET_CODE (condition) == NE)
5155 {
5156 rtx new_jump;
5157
5158 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)),
5159 last_insn);
5160 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn);
5161 LABEL_NUSES (JUMP_LABEL (new_jump))++;
5162 emit_barrier_after (new_jump);
5163 }
5164 delete_insn (last_insn);
5165 if (compare_and_branch == 2)
5166 delete_insn (earliest);
0511851c
MM
5167
5168 /* Don't check this block again. (Note that BLOCK_END is
5169 invalid here; we deleted the last instruction in the
5170 block.) */
5171 block_reg[bb] = 0;
5172 }
5173}
5174
5175/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
5176 at compile time.
5177
5178 This is conceptually similar to global constant/copy propagation and
5179 classic global CSE (it even uses the same dataflow equations as cprop).
5180
5181 If a register is used as memory address with the form (mem (reg)), then we
5182 know that REG can not be zero at that point in the program. Any instruction
5183 which sets REG "kills" this property.
5184
5185 So, if every path leading to a conditional branch has an available memory
5186 reference of that form, then we know the register can not have the value
5187 zero at the conditional branch.
5188
5189 So we merely need to compute the local properies and propagate that data
5190 around the cfg, then optimize where possible.
5191
5192 We run this pass two times. Once before CSE, then again after CSE. This
5193 has proven to be the most profitable approach. It is rare for new
5194 optimization opportunities of this nature to appear after the first CSE
5195 pass.
5196
5197 This could probably be integrated with global cprop with a little work. */
5198
5199void
5200delete_null_pointer_checks (f)
5201 rtx f;
5202{
0511851c
MM
5203 sbitmap *nonnull_avin, *nonnull_avout;
5204 int *block_reg;
5205 int bb;
5206 int reg;
5207 int regs_per_pass;
5208 int max_reg;
5209 struct null_pointer_info npi;
5210
5211 /* First break the program into basic blocks. */
5212 find_basic_blocks (f, max_reg_num (), NULL, 1);
5213
5214 /* If we have only a single block, then there's nothing to do. */
5215 if (n_basic_blocks <= 1)
5216 {
5217 /* Free storage allocated by find_basic_blocks. */
5218 free_basic_block_vars (0);
5219 return;
5220 }
5221
5222 /* Trying to perform global optimizations on flow graphs which have
5223 a high connectivity will take a long time and is unlikely to be
5224 particularly useful.
5225
5226 In normal circumstances a cfg should have about twice has many edges
5227 as blocks. But we do not want to punish small functions which have
5228 a couple switch statements. So we require a relatively large number
5229 of basic blocks and the ratio of edges to blocks to be high. */
5230 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
5231 {
5232 /* Free storage allocated by find_basic_blocks. */
5233 free_basic_block_vars (0);
5234 return;
5235 }
5236
0511851c
MM
5237 /* We need four bitmaps, each with a bit for each register in each
5238 basic block. */
5239 max_reg = max_reg_num ();
5240 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
5241
5242 /* Allocate bitmaps to hold local and global properties. */
5243 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5244 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5245 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5246 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
5247
5248 /* Go through the basic blocks, seeing whether or not each block
5249 ends with a conditional branch whose condition is a comparison
5250 against zero. Record the register compared in BLOCK_REG. */
5251 block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
5252 for (bb = 0; bb < n_basic_blocks; bb++)
5253 {
5254 rtx last_insn = BLOCK_END (bb);
5255 rtx condition, earliest, reg;
5256
5257 /* We only want conditional branches. */
5258 if (GET_CODE (last_insn) != JUMP_INSN
5259 || !condjump_p (last_insn)
5260 || simplejump_p (last_insn))
5261 continue;
5262
5263 /* LAST_INSN is a conditional jump. Get its condition. */
5264 condition = get_condition (last_insn, &earliest);
5265
5266 /* If we were unable to get the condition, or it is not a equality
5267 comparison against zero then there's nothing we can do. */
5268 if (!condition
5269 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
5270 || GET_CODE (XEXP (condition, 1)) != CONST_INT
5271 || (XEXP (condition, 1)
5272 != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
5273 continue;
5274
5275 /* We must be checking a register against zero. */
5276 reg = XEXP (condition, 0);
5277 if (GET_CODE (reg) != REG)
5278 continue;
5279
5280 block_reg[bb] = REGNO (reg);
5281 }
5282
5283 /* Go through the algorithm for each block of registers. */
5284 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
5285 {
5286 npi.min_reg = reg;
5287 npi.max_reg = MIN (reg + regs_per_pass, max_reg);
b71a2ff8 5288 delete_null_pointer_checks_1 (block_reg, nonnull_avin,
0511851c 5289 nonnull_avout, &npi);
dfdb644f
JL
5290 }
5291
5292 /* Free storage allocated by find_basic_blocks. */
5293 free_basic_block_vars (0);
5294
0511851c
MM
5295 /* Free the table of registers compared at the end of every block. */
5296 free (block_reg);
5297
dfdb644f 5298 /* Free bitmaps. */
0511851c
MM
5299 free (npi.nonnull_local);
5300 free (npi.nonnull_killed);
dfdb644f
JL
5301 free (nonnull_avin);
5302 free (nonnull_avout);
5303}
bb457bd9
JL
5304
5305/* Code Hoisting variables and subroutines. */
5306
5307/* Very busy expressions. */
5308static sbitmap *hoist_vbein;
5309static sbitmap *hoist_vbeout;
5310
5311/* Hoistable expressions. */
5312static sbitmap *hoist_exprs;
5313
5314/* Dominator bitmaps. */
5315static sbitmap *dominators;
bb457bd9
JL
5316
5317/* ??? We could compute post dominators and run this algorithm in
5318 reverse to to perform tail merging, doing so would probably be
5319 more effective than the tail merging code in jump.c.
5320
5321 It's unclear if tail merging could be run in parallel with
5322 code hoisting. It would be nice. */
5323
5324/* Allocate vars used for code hoisting analysis. */
5325
5326static void
5327alloc_code_hoist_mem (n_blocks, n_exprs)
5328 int n_blocks, n_exprs;
5329{
5330 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5331 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
5332 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
5333
5334 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
5335 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
5336 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
5337 transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
5338
5339 dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
bb457bd9
JL
5340}
5341
5342/* Free vars used for code hoisting analysis. */
5343
5344static void
5345free_code_hoist_mem ()
5346{
5347 free (antloc);
5348 free (transp);
5349 free (comp);
5350
5351 free (hoist_vbein);
5352 free (hoist_vbeout);
5353 free (hoist_exprs);
5354 free (transpout);
5355
5356 free (dominators);
bb457bd9
JL
5357}
5358
5359/* Compute the very busy expressions at entry/exit from each block.
5360
5361 An expression is very busy if all paths from a given point
5362 compute the expression. */
5363
5364static void
5365compute_code_hoist_vbeinout ()
5366{
5367 int bb, changed, passes;
5368
5369 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
5370 sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
5371
5372 passes = 0;
5373 changed = 1;
5374 while (changed)
5375 {
5376 changed = 0;
5377 /* We scan the blocks in the reverse order to speed up
5378 the convergence. */
5379 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
5380 {
5381 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
5382 hoist_vbeout[bb], transp[bb]);
5383 if (bb != n_basic_blocks - 1)
a42cd965 5384 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb);
bb457bd9
JL
5385 }
5386 passes++;
5387 }
5388
5389 if (gcse_file)
5390 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
5391}
5392
5393/* Top level routine to do the dataflow analysis needed by code hoisting. */
5394
5395static void
5396compute_code_hoist_data ()
5397{
5398 compute_local_properties (transp, comp, antloc, 0);
5399 compute_transpout ();
5400 compute_code_hoist_vbeinout ();
092ae4ba 5401 compute_flow_dominators (dominators, NULL);
bb457bd9
JL
5402 if (gcse_file)
5403 fprintf (gcse_file, "\n");
5404}
5405
5406/* Determine if the expression identified by EXPR_INDEX would
5407 reach BB unimpared if it was placed at the end of EXPR_BB.
5408
5409 It's unclear exactly what Muchnick meant by "unimpared". It seems
5410 to me that the expression must either be computed or transparent in
5411 *every* block in the path(s) from EXPR_BB to BB. Any other definition
5412 would allow the expression to be hoisted out of loops, even if
5413 the expression wasn't a loop invariant.
5414
5415 Contrast this to reachability for PRE where an expression is
5416 considered reachable if *any* path reaches instead of *all*
5417 paths. */
5418
5419static int
5420hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
5421 int expr_bb;
5422 int expr_index;
5423 int bb;
5424 char *visited;
5425{
5426 edge pred;
283a2545
RL
5427 int visited_allocated_locally = 0;
5428
bb457bd9
JL
5429
5430 if (visited == NULL)
5431 {
283a2545
RL
5432 visited_allocated_locally = 1;
5433 visited = xcalloc (n_basic_blocks, 1);
bb457bd9
JL
5434 }
5435
5436 visited[expr_bb] = 1;
5437 for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
5438 {
5439 int pred_bb = pred->src->index;
5440
5441 if (pred->src == ENTRY_BLOCK_PTR)
5442 break;
5443 else if (visited[pred_bb])
5444 continue;
5445 /* Does this predecessor generate this expression? */
5446 else if (TEST_BIT (comp[pred_bb], expr_index))
5447 break;
5448 else if (! TEST_BIT (transp[pred_bb], expr_index))
5449 break;
5450 /* Not killed. */
5451 else
5452 {
5453 visited[pred_bb] = 1;
5454 if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
5455 pred_bb, visited))
5456 break;
5457 }
5458 }
283a2545
RL
5459 if (visited_allocated_locally)
5460 free (visited);
bb457bd9
JL
5461 return (pred == NULL);
5462}
5463\f
5464/* Actually perform code hoisting. */
5465static void
5466hoist_code ()
5467{
5468 int bb, dominated, i;
5469 struct expr **index_map;
5470
5471 sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
5472
5473 /* Compute a mapping from expression number (`bitmap_index') to
5474 hash table entry. */
5475
dd1bd863 5476 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *));
bb457bd9
JL
5477 for (i = 0; i < expr_hash_table_size; i++)
5478 {
5479 struct expr *expr;
5480
5481 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
5482 index_map[expr->bitmap_index] = expr;
5483 }
5484
5485 /* Walk over each basic block looking for potentially hoistable
5486 expressions, nothing gets hoisted from the entry block. */
5487 for (bb = 0; bb < n_basic_blocks; bb++)
5488 {
5489 int found = 0;
5490 int insn_inserted_p;
5491
5492 /* Examine each expression that is very busy at the exit of this
5493 block. These are the potentially hoistable expressions. */
5494 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
5495 {
5496 int hoistable = 0;
5497 if (TEST_BIT (hoist_vbeout[bb], i)
5498 && TEST_BIT (transpout[bb], i))
5499 {
5500 /* We've found a potentially hoistable expression, now
5501 we look at every block BB dominates to see if it
5502 computes the expression. */
5503 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5504 {
5505 /* Ignore self dominance. */
5506 if (bb == dominated
5507 || ! TEST_BIT (dominators[dominated], bb))
5508 continue;
5509
5510 /* We've found a dominated block, now see if it computes
5511 the busy expression and whether or not moving that
5512 expression to the "beginning" of that block is safe. */
5513 if (!TEST_BIT (antloc[dominated], i))
5514 continue;
5515
5516 /* Note if the expression would reach the dominated block
5517 unimpared if it was placed at the end of BB.
5518
5519 Keep track of how many times this expression is hoistable
5520 from a dominated block into BB. */
5521 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5522 hoistable++;
5523 }
5524
5525 /* If we found more than one hoistable occurence of this
5526 expression, then note it in the bitmap of expressions to
5527 hoist. It makes no sense to hoist things which are computed
5528 in only one BB, and doing so tends to pessimize register
5529 allocation. One could increase this value to try harder
5530 to avoid any possible code expansion due to register
5531 allocation issues; however experiments have shown that
5532 the vast majority of hoistable expressions are only movable
5533 from two successors, so raising this threshhold is likely
5534 to nullify any benefit we get from code hoisting. */
5535 if (hoistable > 1)
5536 {
5537 SET_BIT (hoist_exprs[bb], i);
5538 found = 1;
5539 }
5540 }
5541 }
5542
5543 /* If we found nothing to hoist, then quit now. */
5544 if (! found)
5545 continue;
5546
5547 /* Loop over all the hoistable expressions. */
5548 for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
5549 {
5550 /* We want to insert the expression into BB only once, so
5551 note when we've inserted it. */
5552 insn_inserted_p = 0;
5553
5554 /* These tests should be the same as the tests above. */
5555 if (TEST_BIT (hoist_vbeout[bb], i))
5556 {
5557 /* We've found a potentially hoistable expression, now
5558 we look at every block BB dominates to see if it
5559 computes the expression. */
5560 for (dominated = 0; dominated < n_basic_blocks; dominated++)
5561 {
5562 /* Ignore self dominance. */
5563 if (bb == dominated
5564 || ! TEST_BIT (dominators[dominated], bb))
5565 continue;
5566
5567 /* We've found a dominated block, now see if it computes
5568 the busy expression and whether or not moving that
5569 expression to the "beginning" of that block is safe. */
5570 if (!TEST_BIT (antloc[dominated], i))
5571 continue;
5572
5573 /* The expression is computed in the dominated block and
5574 it would be safe to compute it at the start of the
5575 dominated block. Now we have to determine if the
5576 expresion would reach the dominated block if it was
5577 placed at the end of BB. */
5578 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
5579 {
5580 struct expr *expr = index_map[i];
5581 struct occr *occr = expr->antic_occr;
5582 rtx insn;
5583 rtx set;
5584
5585
5586 /* Find the right occurence of this expression. */
5587 while (BLOCK_NUM (occr->insn) != dominated && occr)
5588 occr = occr->next;
5589
5590 /* Should never happen. */
5591 if (!occr)
5592 abort ();
5593
5594 insn = occr->insn;
5595
5596 set = single_set (insn);
5597 if (! set)
5598 abort ();
5599
5600 /* Create a pseudo-reg to store the result of reaching
5601 expressions into. Get the mode for the new pseudo
5602 from the mode of the original destination pseudo. */
5603 if (expr->reaching_reg == NULL)
5604 expr->reaching_reg
5605 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
5606
5607 /* In theory this should never fail since we're creating
5608 a reg->reg copy.
5609
5610 However, on the x86 some of the movXX patterns actually
5611 contain clobbers of scratch regs. This may cause the
5612 insn created by validate_change to not match any
5613 pattern and thus cause validate_change to fail. */
5614 if (validate_change (insn, &SET_SRC (set),
5615 expr->reaching_reg, 0))
5616 {
5617 occr->deleted_p = 1;
5618 if (!insn_inserted_p)
5619 {
5620 insert_insn_end_bb (index_map[i], bb, 0);
5621 insn_inserted_p = 1;
5622 }
5623 }
5624 }
5625 }
5626 }
5627 }
5628 }
283a2545 5629 free (index_map);
bb457bd9
JL
5630}
5631
5632/* Top level routine to perform one code hoisting (aka unification) pass
5633
5634 Return non-zero if a change was made. */
5635
5636static int
5637one_code_hoisting_pass ()
5638{
5639 int changed = 0;
5640
5641 alloc_expr_hash_table (max_cuid);
5642 compute_expr_hash_table ();
5643 if (gcse_file)
5644 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
5645 expr_hash_table_size, n_exprs);
5646 if (n_exprs > 0)
5647 {
5648 alloc_code_hoist_mem (n_basic_blocks, n_exprs);
5649 compute_code_hoist_data ();
5650 hoist_code ();
5651 free_code_hoist_mem ();
5652 }
5653 free_expr_hash_table ();
5654
5655 return changed;
5656}
This page took 0.902474 seconds and 5 git commands to generate.