]> gcc.gnu.org Git - gcc.git/blame - gcc/gcse.c
config.host (arm*-*-eabi* | arm*-*-symbianelf* | arm*-*-rtemseabi*): Rename "arm...
[gcc.git] / gcc / gcse.c
CommitLineData
e45425ec 1/* Partial redundancy elimination / Hoisting for RTL.
62e5bf5d 2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
29fa95ed 3 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
7506f491 4
1322177d 5This file is part of GCC.
7506f491 6
1322177d
LB
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
1322177d 10version.
7506f491 11
1322177d
LB
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
7506f491
DE
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
7506f491
DE
20
21/* TODO
22 - reordering of memory allocation and freeing to be more space efficient
23 - do rough calc of how many regs are needed in each block, and a rough
24 calc of how many regs are available in each class and use that to
25 throttle back the code in cases where RTX_COST is minimal.
7506f491
DE
26*/
27
28/* References searched while implementing this.
7506f491
DE
29
30 Compilers Principles, Techniques and Tools
31 Aho, Sethi, Ullman
32 Addison-Wesley, 1988
33
34 Global Optimization by Suppression of Partial Redundancies
35 E. Morel, C. Renvoise
36 communications of the acm, Vol. 22, Num. 2, Feb. 1979
37
38 A Portable Machine-Independent Global Optimizer - Design and Measurements
39 Frederick Chow
40 Stanford Ph.D. thesis, Dec. 1983
41
7506f491
DE
42 A Fast Algorithm for Code Movement Optimization
43 D.M. Dhamdhere
44 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
45
46 A Solution to a Problem with Morel and Renvoise's
47 Global Optimization by Suppression of Partial Redundancies
48 K-H Drechsler, M.P. Stadel
49 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
50
51 Practical Adaptation of the Global Optimization
52 Algorithm of Morel and Renvoise
53 D.M. Dhamdhere
54 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
55
56 Efficiently Computing Static Single Assignment Form and the Control
57 Dependence Graph
58 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
60
7506f491
DE
61 Lazy Code Motion
62 J. Knoop, O. Ruthing, B. Steffen
63 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
64
65 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
66 Time for Reducible Flow Control
67 Thomas Ball
68 ACM Letters on Programming Languages and Systems,
69 Vol. 2, Num. 1-4, Mar-Dec 1993
70
71 An Efficient Representation for Sparse Sets
72 Preston Briggs, Linda Torczon
73 ACM Letters on Programming Languages and Systems,
74 Vol. 2, Num. 1-4, Mar-Dec 1993
75
76 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77 K-H Drechsler, M.P. Stadel
78 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
79
80 Partial Dead Code Elimination
81 J. Knoop, O. Ruthing, B. Steffen
82 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
83
84 Effective Partial Redundancy Elimination
85 P. Briggs, K.D. Cooper
86 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
87
88 The Program Structure Tree: Computing Control Regions in Linear Time
89 R. Johnson, D. Pearson, K. Pingali
90 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
91
92 Optimal Code Motion: Theory and Practice
93 J. Knoop, O. Ruthing, B. Steffen
94 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
95
96 The power of assignment motion
97 J. Knoop, O. Ruthing, B. Steffen
98 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
99
100 Global code motion / global value numbering
101 C. Click
102 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
103
104 Value Driven Redundancy Elimination
105 L.T. Simpson
106 Rice University Ph.D. thesis, Apr. 1996
107
108 Value Numbering
109 L.T. Simpson
110 Massively Scalar Compiler Project, Rice University, Sep. 1996
111
112 High Performance Compilers for Parallel Computing
113 Michael Wolfe
114 Addison-Wesley, 1996
115
f4e584dc
JL
116 Advanced Compiler Design and Implementation
117 Steven Muchnick
118 Morgan Kaufmann, 1997
119
a42cd965
AM
120 Building an Optimizing Compiler
121 Robert Morgan
122 Digital Press, 1998
123
f4e584dc
JL
124 People wishing to speed up the code here should read:
125 Elimination Algorithms for Data Flow Analysis
126 B.G. Ryder, M.C. Paull
127 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
128
129 How to Analyze Large Programs Efficiently and Informatively
130 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
132
7506f491
DE
133 People wishing to do something different can find various possibilities
134 in the above papers and elsewhere.
135*/
136
137#include "config.h"
50b2596f 138#include "system.h"
4977bab6
ZW
139#include "coretypes.h"
140#include "tm.h"
718f9c0f 141#include "diagnostic-core.h"
01198c2f 142#include "toplev.h"
7506f491
DE
143
144#include "rtl.h"
b0656d8b 145#include "tree.h"
6baf1cc8 146#include "tm_p.h"
7506f491
DE
147#include "regs.h"
148#include "hard-reg-set.h"
149#include "flags.h"
7506f491
DE
150#include "insn-config.h"
151#include "recog.h"
152#include "basic-block.h"
49ad7cfa 153#include "function.h"
589005ff 154#include "expr.h"
e7d482b9 155#include "except.h"
fb0c0a12 156#include "ggc.h"
f1fa37ff 157#include "params.h"
ae860ff7 158#include "cselib.h"
d128effb 159#include "intl.h"
7506f491 160#include "obstack.h"
ef330312 161#include "tree-pass.h"
9727e468 162#include "hashtab.h"
6fb5fa3c
DB
163#include "df.h"
164#include "dbgcnt.h"
ec0a1343 165#include "target.h"
7c6811fe 166#include "gcse.h"
4fa31c2a 167
f4e584dc 168/* We support GCSE via Partial Redundancy Elimination. PRE optimizations
4cad6dba 169 are a superset of those done by classic GCSE.
7506f491 170
e45425ec
SB
171 Two passes of copy/constant propagation are done around PRE or hoisting
172 because the first one enables more GCSE and the second one helps to clean
173 up the copies that PRE and HOIST create. This is needed more for PRE than
174 for HOIST because code hoisting will try to use an existing register
175 containing the common subexpression rather than create a new one. This is
176 harder to do for PRE because of the code motion (which HOIST doesn't do).
7506f491
DE
177
178 Expressions we are interested in GCSE-ing are of the form
179 (set (pseudo-reg) (expression)).
180 Function want_to_gcse_p says what these are.
181
4cad6dba 182 In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
3906a4a1 183 This allows PRE to hoist expressions that are expressed in multiple insns,
4cad6dba
SB
184 such as complex address calculations (e.g. for PIC code, or loads with a
185 high part and a low part).
3906a4a1 186
7506f491 187 PRE handles moving invariant expressions out of loops (by treating them as
f4e584dc 188 partially redundant).
7506f491 189
7506f491
DE
190 **********************
191
192 We used to support multiple passes but there are diminishing returns in
193 doing so. The first pass usually makes 90% of the changes that are doable.
194 A second pass can make a few more changes made possible by the first pass.
195 Experiments show any further passes don't make enough changes to justify
196 the expense.
197
198 A study of spec92 using an unlimited number of passes:
199 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
200 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
201 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
202
203 It was found doing copy propagation between each pass enables further
204 substitutions.
205
3906a4a1
SB
206 This study was done before expressions in REG_EQUAL notes were added as
207 candidate expressions for optimization, and before the GIMPLE optimizers
208 were added. Probably, multiple passes is even less efficient now than
209 at the time when the study was conducted.
210
7506f491 211 PRE is quite expensive in complicated functions because the DFA can take
3906a4a1 212 a while to converge. Hence we only perform one pass.
7506f491
DE
213
214 **********************
215
216 The steps for PRE are:
217
218 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
219
220 2) Perform the data flow analysis for PRE.
221
222 3) Delete the redundant instructions
223
224 4) Insert the required copies [if any] that make the partially
225 redundant instructions fully redundant.
226
227 5) For other reaching expressions, insert an instruction to copy the value
228 to a newly created pseudo that will reach the redundant instruction.
229
230 The deletion is done first so that when we do insertions we
231 know which pseudo reg to use.
232
233 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
234 argue it is not. The number of iterations for the algorithm to converge
235 is typically 2-4 so I don't view it as that expensive (relatively speaking).
236
4cad6dba 237 PRE GCSE depends heavily on the second CPROP pass to clean up the copies
7506f491
DE
238 we create. To make an expression reach the place where it's redundant,
239 the result of the expression is copied to a new register, and the redundant
240 expression is deleted by replacing it with this new register. Classic GCSE
241 doesn't have this problem as much as it computes the reaching defs of
a3c28ba2
KH
242 each register in each block and thus can try to use an existing
243 register. */
7506f491
DE
244\f
245/* GCSE global vars. */
246
7c6811fe
RS
247struct target_gcse default_target_gcse;
248#if SWITCHABLE_TARGET
249struct target_gcse *this_target_gcse = &default_target_gcse;
250#endif
251
5f39ad47
SB
252/* Set to non-zero if CSE should run after all GCSE optimizations are done. */
253int flag_rerun_cse_after_global_opts;
f4e584dc 254
7506f491
DE
255/* An obstack for our working variables. */
256static struct obstack gcse_obstack;
257
c4c81601 258struct reg_use {rtx reg_rtx; };
abd535b6 259
7506f491
DE
260/* Hash table of expressions. */
261
262struct expr
263{
43c8a043 264 /* The expression. */
7506f491
DE
265 rtx expr;
266 /* Index in the available expression bitmaps. */
267 int bitmap_index;
268 /* Next entry with the same hash. */
269 struct expr *next_same_hash;
270 /* List of anticipatable occurrences in basic blocks in the function.
271 An "anticipatable occurrence" is one that is the first occurrence in the
f4e584dc
JL
272 basic block, the operands are not modified in the basic block prior
273 to the occurrence and the output is not used between the start of
274 the block and the occurrence. */
7506f491
DE
275 struct occr *antic_occr;
276 /* List of available occurrence in basic blocks in the function.
277 An "available occurrence" is one that is the last occurrence in the
278 basic block and the operands are not modified by following statements in
279 the basic block [including this insn]. */
280 struct occr *avail_occr;
281 /* Non-null if the computation is PRE redundant.
282 The value is the newly created pseudo-reg to record a copy of the
283 expression in all the places that reach the redundant copy. */
284 rtx reaching_reg;
20160347
MK
285 /* Maximum distance in instructions this expression can travel.
286 We avoid moving simple expressions for more than a few instructions
287 to keep register pressure under control.
288 A value of "0" removes restrictions on how far the expression can
289 travel. */
290 int max_distance;
7506f491
DE
291};
292
293/* Occurrence of an expression.
294 There is one per basic block. If a pattern appears more than once the
295 last appearance is used [or first for anticipatable expressions]. */
296
297struct occr
298{
299 /* Next occurrence of this expression. */
300 struct occr *next;
301 /* The insn that computes the expression. */
302 rtx insn;
cc2902df 303 /* Nonzero if this [anticipatable] occurrence has been deleted. */
7506f491 304 char deleted_p;
cc2902df 305 /* Nonzero if this [available] occurrence has been copied to
7506f491
DE
306 reaching_reg. */
307 /* ??? This is mutually exclusive with deleted_p, so they could share
308 the same byte. */
309 char copied_p;
310};
311
cad9aa15
MK
312typedef struct occr *occr_t;
313DEF_VEC_P (occr_t);
314DEF_VEC_ALLOC_P (occr_t, heap);
315
e45425ec 316/* Expression hash tables.
7506f491
DE
317 Each hash table is an array of buckets.
318 ??? It is known that if it were an array of entries, structure elements
319 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
320 not clear whether in the final analysis a sufficient amount of memory would
321 be saved as the size of the available expression bitmaps would be larger
322 [one could build a mapping table without holes afterwards though].
c4c81601 323 Someday I'll perform the computation and figure it out. */
7506f491 324
7e5487a2 325struct hash_table_d
02280659
ZD
326{
327 /* The table itself.
328 This is an array of `expr_hash_table_size' elements. */
329 struct expr **table;
330
331 /* Size of the hash table, in elements. */
332 unsigned int size;
2e653e39 333
02280659
ZD
334 /* Number of hash table elements. */
335 unsigned int n_elems;
02280659 336};
c4c81601 337
02280659 338/* Expression hash table. */
7e5487a2 339static struct hash_table_d expr_hash_table;
02280659 340
a13d4ebf 341/* This is a list of expressions which are MEMs and will be used by load
589005ff 342 or store motion.
43c8a043
EB
343 Load motion tracks MEMs which aren't killed by anything except itself,
344 i.e. loads and stores to a single location.
589005ff 345 We can then allow movement of these MEM refs with a little special
a13d4ebf
AM
346 allowance. (all stores copy the same value to the reaching reg used
347 for the loads). This means all values used to store into memory must have
43c8a043 348 no side effects so we can re-issue the setter value. */
a13d4ebf
AM
349
350struct ls_expr
351{
352 struct expr * expr; /* Gcse expression reference for LM. */
353 rtx pattern; /* Pattern of this mem. */
47a3dae1 354 rtx pattern_regs; /* List of registers mentioned by the mem. */
aaa4ca30
AJ
355 rtx loads; /* INSN list of loads seen. */
356 rtx stores; /* INSN list of stores seen. */
a13d4ebf
AM
357 struct ls_expr * next; /* Next in the list. */
358 int invalid; /* Invalid for some reason. */
359 int index; /* If it maps to a bitmap index. */
b58b21d5 360 unsigned int hash_index; /* Index when in a hash table. */
a13d4ebf
AM
361 rtx reaching_reg; /* Register to use when re-writing. */
362};
363
364/* Head of the list of load/store memory refs. */
365static struct ls_expr * pre_ldst_mems = NULL;
366
9727e468
RG
367/* Hashtable for the load/store memory refs. */
368static htab_t pre_ldst_table = NULL;
369
7506f491
DE
370/* Bitmap containing one bit for each register in the program.
371 Used when performing GCSE to track which registers have been set since
372 the start of the basic block. */
73991d6a 373static regset reg_set_bitmap;
7506f491 374
a13d4ebf
AM
375/* Array, indexed by basic block number for a list of insns which modify
376 memory within that block. */
6409abe3 377static VEC (rtx,heap) **modify_mem_list;
0516f6fe 378static bitmap modify_mem_list_set;
a13d4ebf 379
6ce1edcf
NF
380typedef struct modify_pair_s
381{
382 rtx dest; /* A MEM. */
383 rtx dest_addr; /* The canonical address of `dest'. */
384} modify_pair;
385
386DEF_VEC_O(modify_pair);
387DEF_VEC_ALLOC_O(modify_pair,heap);
388
389/* This array parallels modify_mem_list, except that it stores MEMs
390 being set and their canonicalized memory addresses. */
6409abe3 391static VEC (modify_pair,heap) **canon_modify_mem_list;
0516f6fe 392
aa47fcfa
JL
393/* Bitmap indexed by block numbers to record which blocks contain
394 function calls. */
395static bitmap blocks_with_calls;
396
7506f491
DE
397/* Various variables for statistics gathering. */
398
399/* Memory used in a pass.
400 This isn't intended to be absolutely precise. Its intent is only
401 to keep an eye on memory usage. */
402static int bytes_used;
c4c81601 403
7506f491
DE
404/* GCSE substitutions made. */
405static int gcse_subst_count;
406/* Number of copy instructions created. */
407static int gcse_create_count;
7506f491 408\f
20160347
MK
409/* Doing code hoisting. */
410static bool doing_code_hoisting_p = false;
411\f
e83f4801 412/* For available exprs */
df35c271 413static sbitmap *ae_kill;
7506f491 414\f
1d088dee 415static void compute_can_copy (void);
9fe15a12
KG
416static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
417static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
703ad42b 418static void *gcse_alloc (unsigned long);
eb232f4e 419static void alloc_gcse_mem (void);
1d088dee 420static void free_gcse_mem (void);
7e5487a2
ILT
421static void hash_scan_insn (rtx, struct hash_table_d *);
422static void hash_scan_set (rtx, rtx, struct hash_table_d *);
423static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
424static void hash_scan_call (rtx, rtx, struct hash_table_d *);
20160347 425static int want_to_gcse_p (rtx, int *);
ed7a4b4b
KG
426static int oprs_unchanged_p (const_rtx, const_rtx, int);
427static int oprs_anticipatable_p (const_rtx, const_rtx);
428static int oprs_available_p (const_rtx, const_rtx);
20160347 429static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
7e5487a2 430 struct hash_table_d *);
ed7a4b4b 431static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
ed7a4b4b 432static int expr_equiv_p (const_rtx, const_rtx);
1d088dee
AJ
433static void record_last_reg_set_info (rtx, int);
434static void record_last_mem_set_info (rtx);
7bc980e1 435static void record_last_set_info (rtx, const_rtx, void *);
7e5487a2 436static void compute_hash_table (struct hash_table_d *);
e45425ec 437static void alloc_hash_table (struct hash_table_d *);
7e5487a2
ILT
438static void free_hash_table (struct hash_table_d *);
439static void compute_hash_table_work (struct hash_table_d *);
440static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
e45425ec 441static void compute_transp (const_rtx, int, sbitmap *);
1d088dee 442static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
7e5487a2 443 struct hash_table_d *);
7bc980e1 444static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
ed7a4b4b 445static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
7bc980e1 446static void canon_list_insert (rtx, const_rtx, void *);
1d088dee
AJ
447static void alloc_pre_mem (int, int);
448static void free_pre_mem (void);
43c8a043 449static struct edge_list *compute_pre_data (void);
1d088dee
AJ
450static int pre_expr_reaches_here_p (basic_block, struct expr *,
451 basic_block);
eae7938e 452static void insert_insn_end_basic_block (struct expr *, basic_block);
1d088dee
AJ
453static void pre_insert_copy_insn (struct expr *, rtx);
454static void pre_insert_copies (void);
455static int pre_delete (void);
43c8a043 456static int pre_gcse (struct edge_list *);
5f39ad47 457static int one_pre_gcse_pass (void);
1d088dee
AJ
458static void add_label_notes (rtx, rtx);
459static void alloc_code_hoist_mem (int, int);
460static void free_code_hoist_mem (void);
461static void compute_code_hoist_vbeinout (void);
462static void compute_code_hoist_data (void);
20160347
MK
463static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
464 int, int *);
5f39ad47 465static int hoist_code (void);
1d088dee 466static int one_code_hoisting_pass (void);
1d088dee
AJ
467static rtx process_insert_insn (struct expr *);
468static int pre_edge_insert (struct edge_list *, struct expr **);
1d088dee
AJ
469static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
470 basic_block, char *);
471static struct ls_expr * ldst_entry (rtx);
472static void free_ldst_entry (struct ls_expr *);
43c8a043 473static void free_ld_motion_mems (void);
1d088dee
AJ
474static void print_ldst_list (FILE *);
475static struct ls_expr * find_rtx_in_ldst (rtx);
ed7a4b4b 476static int simple_mem (const_rtx);
1d088dee
AJ
477static void invalidate_any_buried_refs (rtx);
478static void compute_ld_motion_mems (void);
479static void trim_ld_motion_mems (void);
480static void update_ld_motion_stores (struct expr *);
1d088dee
AJ
481static void clear_modify_mem_tables (void);
482static void free_modify_mem_tables (void);
483static rtx gcse_emit_move_after (rtx, rtx, rtx);
d128effb 484static bool is_too_expensive (const char *);
1b4572a8
KG
485
486#define GNEW(T) ((T *) gmalloc (sizeof (T)))
487#define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
488
489#define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
490#define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
1b4572a8
KG
491
492#define GNEWVAR(T, S) ((T *) gmalloc ((S)))
493#define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
1b4572a8
KG
494
495#define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
496#define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
7506f491 497\f
7506f491
DE
498/* Misc. utilities. */
499
7c6811fe
RS
500#define can_copy \
501 (this_target_gcse->x_can_copy)
502#define can_copy_init_p \
503 (this_target_gcse->x_can_copy_init_p)
773eae39 504
7506f491
DE
505/* Compute which modes support reg/reg copy operations. */
506
507static void
1d088dee 508compute_can_copy (void)
7506f491
DE
509{
510 int i;
50b2596f 511#ifndef AVOID_CCMODE_COPIES
8e42ace1 512 rtx reg, insn;
50b2596f 513#endif
773eae39 514 memset (can_copy, 0, NUM_MACHINE_MODES);
7506f491
DE
515
516 start_sequence ();
517 for (i = 0; i < NUM_MACHINE_MODES; i++)
c4c81601
RK
518 if (GET_MODE_CLASS (i) == MODE_CC)
519 {
7506f491 520#ifdef AVOID_CCMODE_COPIES
773eae39 521 can_copy[i] = 0;
7506f491 522#else
c4c81601
RK
523 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
524 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
9714cf43 525 if (recog (PATTERN (insn), insn, NULL) >= 0)
773eae39 526 can_copy[i] = 1;
7506f491 527#endif
c4c81601 528 }
141b5810 529 else
773eae39 530 can_copy[i] = 1;
c4c81601 531
7506f491 532 end_sequence ();
7506f491 533}
773eae39
EB
534
535/* Returns whether the mode supports reg/reg copy operations. */
536
537bool
1d088dee 538can_copy_p (enum machine_mode mode)
773eae39 539{
773eae39
EB
540 if (! can_copy_init_p)
541 {
542 compute_can_copy ();
543 can_copy_init_p = true;
544 }
545
546 return can_copy[mode] != 0;
547}
7506f491
DE
548\f
549/* Cover function to xmalloc to record bytes allocated. */
550
703ad42b 551static void *
4ac11022 552gmalloc (size_t size)
7506f491
DE
553{
554 bytes_used += size;
555 return xmalloc (size);
556}
557
9fe15a12
KG
558/* Cover function to xcalloc to record bytes allocated. */
559
560static void *
561gcalloc (size_t nelem, size_t elsize)
562{
563 bytes_used += nelem * elsize;
564 return xcalloc (nelem, elsize);
565}
566
77bbd421 567/* Cover function to obstack_alloc. */
7506f491 568
703ad42b 569static void *
1d088dee 570gcse_alloc (unsigned long size)
7506f491 571{
77bbd421 572 bytes_used += size;
703ad42b 573 return obstack_alloc (&gcse_obstack, size);
7506f491
DE
574}
575
4a81774c 576/* Allocate memory for the reg/memory set tracking tables.
7506f491
DE
577 This is called at the start of each pass. */
578
579static void
eb232f4e 580alloc_gcse_mem (void)
7506f491 581{
7506f491 582 /* Allocate vars to track sets of regs. */
7a8cba34 583 reg_set_bitmap = ALLOC_REG_SET (NULL);
7506f491 584
a13d4ebf
AM
585 /* Allocate array to keep a list of insns which modify memory in each
586 basic block. */
6409abe3
NF
587 modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
588 canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
6ce1edcf 589 last_basic_block);
8bdbfff5
NS
590 modify_mem_list_set = BITMAP_ALLOC (NULL);
591 blocks_with_calls = BITMAP_ALLOC (NULL);
7506f491
DE
592}
593
594/* Free memory allocated by alloc_gcse_mem. */
595
596static void
1d088dee 597free_gcse_mem (void)
7506f491 598{
d5b8da97
SB
599 FREE_REG_SET (reg_set_bitmap);
600
73991d6a 601 free_modify_mem_tables ();
8bdbfff5
NS
602 BITMAP_FREE (modify_mem_list_set);
603 BITMAP_FREE (blocks_with_calls);
7506f491 604}
b5ce41ff
JL
605\f
606/* Compute the local properties of each recorded expression.
c4c81601
RK
607
608 Local properties are those that are defined by the block, irrespective of
609 other blocks.
b5ce41ff
JL
610
611 An expression is transparent in a block if its operands are not modified
612 in the block.
613
614 An expression is computed (locally available) in a block if it is computed
615 at least once and expression would contain the same value if the
616 computation was moved to the end of the block.
617
618 An expression is locally anticipatable in a block if it is computed at
619 least once and expression would contain the same value if the computation
620 was moved to the beginning of the block.
621
e45425ec 622 We call this routine for pre and code hoisting. They all compute
c4c81601 623 basically the same information and thus can easily share this code.
7506f491 624
c4c81601
RK
625 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
626 properties. If NULL, then it is not necessary to compute or record that
627 particular property.
b5ce41ff 628
e45425ec 629 TABLE controls which hash table to look at. */
589005ff 630
b5ce41ff 631static void
7b1b4aed 632compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
7e5487a2 633 struct hash_table_d *table)
b5ce41ff 634{
02280659 635 unsigned int i;
589005ff 636
b5ce41ff
JL
637 /* Initialize any bitmaps that were passed in. */
638 if (transp)
695ab36a 639 {
e45425ec 640 sbitmap_vector_ones (transp, last_basic_block);
695ab36a 641 }
c4c81601 642
b5ce41ff 643 if (comp)
d55bc081 644 sbitmap_vector_zero (comp, last_basic_block);
b5ce41ff 645 if (antloc)
d55bc081 646 sbitmap_vector_zero (antloc, last_basic_block);
b5ce41ff 647
02280659 648 for (i = 0; i < table->size; i++)
7506f491 649 {
b5ce41ff
JL
650 struct expr *expr;
651
02280659 652 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
b5ce41ff 653 {
b5ce41ff 654 int indx = expr->bitmap_index;
c4c81601 655 struct occr *occr;
b5ce41ff
JL
656
657 /* The expression is transparent in this block if it is not killed.
658 We start by assuming all are transparent [none are killed], and
659 then reset the bits for those that are. */
b5ce41ff 660 if (transp)
e45425ec 661 compute_transp (expr->expr, indx, transp);
b5ce41ff
JL
662
663 /* The occurrences recorded in antic_occr are exactly those that
cc2902df 664 we want to set to nonzero in ANTLOC. */
b5ce41ff 665 if (antloc)
c4c81601
RK
666 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
667 {
b0de17ef 668 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
b5ce41ff 669
c4c81601
RK
670 /* While we're scanning the table, this is a good place to
671 initialize this. */
672 occr->deleted_p = 0;
673 }
b5ce41ff
JL
674
675 /* The occurrences recorded in avail_occr are exactly those that
cc2902df 676 we want to set to nonzero in COMP. */
b5ce41ff 677 if (comp)
c4c81601
RK
678 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
679 {
b0de17ef 680 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
b5ce41ff 681
c4c81601
RK
682 /* While we're scanning the table, this is a good place to
683 initialize this. */
684 occr->copied_p = 0;
685 }
b5ce41ff
JL
686
687 /* While we're scanning the table, this is a good place to
688 initialize this. */
689 expr->reaching_reg = 0;
690 }
7506f491 691 }
7506f491
DE
692}
693\f
7506f491
DE
694/* Hash table support. */
695
80c29cc4
RZ
696struct reg_avail_info
697{
e0082a72 698 basic_block last_bb;
80c29cc4
RZ
699 int first_set;
700 int last_set;
701};
702
703static struct reg_avail_info *reg_avail_info;
e0082a72 704static basic_block current_bb;
7506f491 705
fb0c0a12
RK
706/* See whether X, the source of a set, is something we want to consider for
707 GCSE. */
7506f491
DE
708
709static int
20160347 710want_to_gcse_p (rtx x, int *max_distance_ptr)
7506f491 711{
3d8504ac
RS
712#ifdef STACK_REGS
713 /* On register stack architectures, don't GCSE constants from the
714 constant pool, as the benefits are often swamped by the overhead
715 of shuffling the register stack between basic blocks. */
716 if (IS_STACK_MODE (GET_MODE (x)))
717 x = avoid_constant_pool_reference (x);
718#endif
719
20160347
MK
720 /* GCSE'ing constants:
721
722 We do not specifically distinguish between constant and non-constant
5e8f01f4 723 expressions in PRE and Hoist. We use set_src_cost below to limit
20160347
MK
724 the maximum distance simple expressions can travel.
725
726 Nevertheless, constants are much easier to GCSE, and, hence,
727 it is easy to overdo the optimizations. Usually, excessive PRE and
728 Hoisting of constant leads to increased register pressure.
729
730 RA can deal with this by rematerialing some of the constants.
731 Therefore, it is important that the back-end generates sets of constants
732 in a way that allows reload rematerialize them under high register
733 pressure, i.e., a pseudo register with REG_EQUAL to constant
734 is set only once. Failing to do so will result in IRA/reload
735 spilling such constants under high register pressure instead of
736 rematerializing them. */
737
c4c81601 738 switch (GET_CODE (x))
7506f491
DE
739 {
740 case REG:
741 case SUBREG:
20160347
MK
742 case CALL:
743 return 0;
744
d8116890 745 CASE_CONST_ANY:
20160347
MK
746 if (!doing_code_hoisting_p)
747 /* Do not PRE constants. */
748 return 0;
749
750 /* FALLTHRU */
7506f491
DE
751
752 default:
20160347
MK
753 if (doing_code_hoisting_p)
754 /* PRE doesn't implement max_distance restriction. */
755 {
756 int cost;
757 int max_distance;
758
759 gcc_assert (!optimize_function_for_speed_p (cfun)
760 && optimize_function_for_size_p (cfun));
5e8f01f4 761 cost = set_src_cost (x, 0);
20160347
MK
762
763 if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
764 {
765 max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
766 if (max_distance == 0)
767 return 0;
768
769 gcc_assert (max_distance > 0);
770 }
771 else
772 max_distance = 0;
773
774 if (max_distance_ptr)
775 *max_distance_ptr = max_distance;
776 }
777
df35c271 778 return can_assign_to_reg_without_clobbers_p (x);
7506f491 779 }
1707bafa
RS
780}
781
df35c271 782/* Used internally by can_assign_to_reg_without_clobbers_p. */
1707bafa
RS
783
784static GTY(()) rtx test_insn;
785
df35c271
SB
786/* Return true if we can assign X to a pseudo register such that the
787 resulting insn does not result in clobbering a hard register as a
788 side-effect.
ec0a1343
JB
789
790 Additionally, if the target requires it, check that the resulting insn
791 can be copied. If it cannot, this means that X is special and probably
792 has hidden side-effects we don't want to mess with.
793
df35c271
SB
794 This function is typically used by code motion passes, to verify
795 that it is safe to insert an insn without worrying about clobbering
796 maybe live hard regs. */
1707bafa 797
df35c271
SB
798bool
799can_assign_to_reg_without_clobbers_p (rtx x)
1707bafa
RS
800{
801 int num_clobbers = 0;
802 int icode;
7506f491 803
fb0c0a12
RK
804 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
805 if (general_operand (x, GET_MODE (x)))
806 return 1;
807 else if (GET_MODE (x) == VOIDmode)
808 return 0;
809
810 /* Otherwise, check if we can make a valid insn from it. First initialize
811 our test insn if we haven't already. */
812 if (test_insn == 0)
813 {
814 test_insn
815 = make_insn_raw (gen_rtx_SET (VOIDmode,
816 gen_rtx_REG (word_mode,
817 FIRST_PSEUDO_REGISTER * 2),
818 const0_rtx));
819 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
fb0c0a12
RK
820 }
821
822 /* Now make an insn like the one we would make when GCSE'ing and see if
823 valid. */
824 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
825 SET_SRC (PATTERN (test_insn)) = x;
b8698a0f 826
ec0a1343
JB
827 icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
828 if (icode < 0)
829 return false;
b8698a0f 830
ec0a1343
JB
831 if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
832 return false;
b8698a0f 833
ec0a1343
JB
834 if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
835 return false;
b8698a0f 836
ec0a1343 837 return true;
7506f491
DE
838}
839
cc2902df 840/* Return nonzero if the operands of expression X are unchanged from the
7506f491
DE
841 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
842 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
843
844static int
ed7a4b4b 845oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
7506f491 846{
c4c81601 847 int i, j;
7506f491 848 enum rtx_code code;
6f7d635c 849 const char *fmt;
7506f491 850
7506f491
DE
851 if (x == 0)
852 return 1;
853
854 code = GET_CODE (x);
855 switch (code)
856 {
857 case REG:
80c29cc4
RZ
858 {
859 struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
860
861 if (info->last_bb != current_bb)
862 return 1;
589005ff 863 if (avail_p)
4a81774c 864 return info->last_set < DF_INSN_LUID (insn);
80c29cc4 865 else
4a81774c 866 return info->first_set >= DF_INSN_LUID (insn);
80c29cc4 867 }
7506f491
DE
868
869 case MEM:
4a81774c 870 if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
a13d4ebf
AM
871 x, avail_p))
872 return 0;
7506f491 873 else
c4c81601 874 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
7506f491
DE
875
876 case PRE_DEC:
877 case PRE_INC:
878 case POST_DEC:
879 case POST_INC:
4b983fdc
RH
880 case PRE_MODIFY:
881 case POST_MODIFY:
7506f491
DE
882 return 0;
883
884 case PC:
885 case CC0: /*FIXME*/
886 case CONST:
d8116890 887 CASE_CONST_ANY:
7506f491
DE
888 case SYMBOL_REF:
889 case LABEL_REF:
890 case ADDR_VEC:
891 case ADDR_DIFF_VEC:
892 return 1;
893
894 default:
895 break;
896 }
897
c4c81601 898 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
7506f491
DE
899 {
900 if (fmt[i] == 'e')
901 {
c4c81601
RK
902 /* If we are about to do the last recursive call needed at this
903 level, change it into iteration. This function is called enough
904 to be worth it. */
7506f491 905 if (i == 0)
c4c81601
RK
906 return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
907
908 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
7506f491
DE
909 return 0;
910 }
911 else if (fmt[i] == 'E')
c4c81601
RK
912 for (j = 0; j < XVECLEN (x, i); j++)
913 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
914 return 0;
7506f491
DE
915 }
916
917 return 1;
918}
919
43c8a043 920/* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
a13d4ebf 921
43c8a043
EB
922struct mem_conflict_info
923{
924 /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
925 see if a memory store conflicts with this memory load. */
926 const_rtx mem;
a13d4ebf 927
43c8a043
EB
928 /* True if mems_conflict_for_gcse_p finds a conflict between two memory
929 references. */
930 bool conflict;
931};
932
933/* DEST is the output of an instruction. If it is a memory reference and
934 possibly conflicts with the load found in DATA, then communicate this
935 information back through DATA. */
a13d4ebf
AM
936
937static void
7bc980e1 938mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
43c8a043 939 void *data)
a13d4ebf 940{
43c8a043
EB
941 struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
942
a13d4ebf
AM
943 while (GET_CODE (dest) == SUBREG
944 || GET_CODE (dest) == ZERO_EXTRACT
a13d4ebf
AM
945 || GET_CODE (dest) == STRICT_LOW_PART)
946 dest = XEXP (dest, 0);
947
948 /* If DEST is not a MEM, then it will not conflict with the load. Note
949 that function calls are assumed to clobber memory, but are handled
950 elsewhere. */
7b1b4aed 951 if (! MEM_P (dest))
a13d4ebf 952 return;
aaa4ca30 953
a13d4ebf 954 /* If we are setting a MEM in our list of specially recognized MEMs,
589005ff 955 don't mark as killed this time. */
43c8a043 956 if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
a13d4ebf
AM
957 {
958 if (!find_rtx_in_ldst (dest))
43c8a043 959 mci->conflict = true;
a13d4ebf
AM
960 return;
961 }
aaa4ca30 962
53d9622b 963 if (true_dependence (dest, GET_MODE (dest), mci->mem))
43c8a043 964 mci->conflict = true;
a13d4ebf
AM
965}
966
967/* Return nonzero if the expression in X (a memory reference) is killed
4a81774c 968 in block BB before or after the insn with the LUID in UID_LIMIT.
a13d4ebf
AM
969 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
970 before UID_LIMIT.
971
972 To check the entire block, set UID_LIMIT to max_uid + 1 and
973 AVAIL_P to 0. */
974
975static int
43c8a043
EB
976load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
977 int avail_p)
a13d4ebf 978{
6409abe3
NF
979 VEC (rtx,heap) *list = modify_mem_list[bb->index];
980 rtx setter;
981 unsigned ix;
16c5b95d
MH
982
983 /* If this is a readonly then we aren't going to be changing it. */
984 if (MEM_READONLY_P (x))
985 return 0;
986
6409abe3 987 FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
a13d4ebf 988 {
43c8a043
EB
989 struct mem_conflict_info mci;
990
a13d4ebf
AM
991 /* Ignore entries in the list that do not apply. */
992 if ((avail_p
6409abe3 993 && DF_INSN_LUID (setter) < uid_limit)
a13d4ebf 994 || (! avail_p
6409abe3
NF
995 && DF_INSN_LUID (setter) > uid_limit))
996 continue;
a13d4ebf
AM
997
998 /* If SETTER is a call everything is clobbered. Note that calls
999 to pure functions are never put on the list, so we need not
1000 worry about them. */
7b1b4aed 1001 if (CALL_P (setter))
a13d4ebf
AM
1002 return 1;
1003
1004 /* SETTER must be an INSN of some kind that sets memory. Call
43c8a043
EB
1005 note_stores to examine each hunk of memory that is modified. */
1006 mci.mem = x;
1007 mci.conflict = false;
1008 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1009 if (mci.conflict)
a13d4ebf 1010 return 1;
a13d4ebf
AM
1011 }
1012 return 0;
1013}
1014
cc2902df 1015/* Return nonzero if the operands of expression X are unchanged from
7506f491
DE
1016 the start of INSN's basic block up to but not including INSN. */
1017
1018static int
ed7a4b4b 1019oprs_anticipatable_p (const_rtx x, const_rtx insn)
7506f491
DE
1020{
1021 return oprs_unchanged_p (x, insn, 0);
1022}
1023
cc2902df 1024/* Return nonzero if the operands of expression X are unchanged from
7506f491
DE
1025 INSN to the end of INSN's basic block. */
1026
1027static int
ed7a4b4b 1028oprs_available_p (const_rtx x, const_rtx insn)
7506f491
DE
1029{
1030 return oprs_unchanged_p (x, insn, 1);
1031}
1032
1033/* Hash expression X.
c4c81601
RK
1034
1035 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1036 indicating if a volatile operand is found or if the expression contains
b58b21d5 1037 something we don't want to insert in the table. HASH_TABLE_SIZE is
0516f6fe 1038 the current size of the hash table to be probed. */
7506f491
DE
1039
1040static unsigned int
ed7a4b4b 1041hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
b58b21d5 1042 int hash_table_size)
7506f491
DE
1043{
1044 unsigned int hash;
1045
1046 *do_not_record_p = 0;
1047
43c8a043 1048 hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
7506f491
DE
1049 return hash % hash_table_size;
1050}
172890a2 1051
0516f6fe 1052/* Return nonzero if exp1 is equivalent to exp2. */
7506f491
DE
1053
1054static int
ed7a4b4b 1055expr_equiv_p (const_rtx x, const_rtx y)
7506f491 1056{
0516f6fe 1057 return exp_equiv_p (x, y, 0, true);
7506f491
DE
1058}
1059
02280659 1060/* Insert expression X in INSN in the hash TABLE.
7506f491
DE
1061 If it is already present, record it as the last occurrence in INSN's
1062 basic block.
1063
1064 MODE is the mode of the value X is being stored into.
1065 It is only used if X is a CONST_INT.
1066
cc2902df 1067 ANTIC_P is nonzero if X is an anticipatable expression.
20160347
MK
1068 AVAIL_P is nonzero if X is an available expression.
1069
1070 MAX_DISTANCE is the maximum distance in instructions this expression can
1071 be moved. */
7506f491
DE
1072
1073static void
1d088dee 1074insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
20160347 1075 int avail_p, int max_distance, struct hash_table_d *table)
7506f491
DE
1076{
1077 int found, do_not_record_p;
1078 unsigned int hash;
1079 struct expr *cur_expr, *last_expr = NULL;
1080 struct occr *antic_occr, *avail_occr;
7506f491 1081
02280659 1082 hash = hash_expr (x, mode, &do_not_record_p, table->size);
7506f491
DE
1083
1084 /* Do not insert expression in table if it contains volatile operands,
1085 or if hash_expr determines the expression is something we don't want
1086 to or can't handle. */
1087 if (do_not_record_p)
1088 return;
1089
02280659 1090 cur_expr = table->table[hash];
7506f491
DE
1091 found = 0;
1092
c4c81601 1093 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
7506f491
DE
1094 {
1095 /* If the expression isn't found, save a pointer to the end of
1096 the list. */
1097 last_expr = cur_expr;
1098 cur_expr = cur_expr->next_same_hash;
1099 }
1100
1101 if (! found)
1102 {
1b4572a8 1103 cur_expr = GOBNEW (struct expr);
7506f491 1104 bytes_used += sizeof (struct expr);
02280659 1105 if (table->table[hash] == NULL)
c4c81601 1106 /* This is the first pattern that hashed to this index. */
02280659 1107 table->table[hash] = cur_expr;
7506f491 1108 else
c4c81601
RK
1109 /* Add EXPR to end of this hash chain. */
1110 last_expr->next_same_hash = cur_expr;
1111
589005ff 1112 /* Set the fields of the expr element. */
7506f491 1113 cur_expr->expr = x;
02280659 1114 cur_expr->bitmap_index = table->n_elems++;
7506f491
DE
1115 cur_expr->next_same_hash = NULL;
1116 cur_expr->antic_occr = NULL;
1117 cur_expr->avail_occr = NULL;
20160347
MK
1118 gcc_assert (max_distance >= 0);
1119 cur_expr->max_distance = max_distance;
7506f491 1120 }
20160347
MK
1121 else
1122 gcc_assert (cur_expr->max_distance == max_distance);
7506f491
DE
1123
1124 /* Now record the occurrence(s). */
7506f491
DE
1125 if (antic_p)
1126 {
1127 antic_occr = cur_expr->antic_occr;
1128
b0de17ef
SB
1129 if (antic_occr
1130 && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
b6e47ceb 1131 antic_occr = NULL;
7506f491
DE
1132
1133 if (antic_occr)
c4c81601
RK
1134 /* Found another instance of the expression in the same basic block.
1135 Prefer the currently recorded one. We want the first one in the
1136 block and the block is scanned from start to end. */
1137 ; /* nothing to do */
7506f491
DE
1138 else
1139 {
1140 /* First occurrence of this expression in this basic block. */
1b4572a8 1141 antic_occr = GOBNEW (struct occr);
7506f491 1142 bytes_used += sizeof (struct occr);
7506f491 1143 antic_occr->insn = insn;
b6e47ceb 1144 antic_occr->next = cur_expr->antic_occr;
f9957958 1145 antic_occr->deleted_p = 0;
b6e47ceb 1146 cur_expr->antic_occr = antic_occr;
7506f491
DE
1147 }
1148 }
1149
1150 if (avail_p)
1151 {
1152 avail_occr = cur_expr->avail_occr;
1153
b0de17ef
SB
1154 if (avail_occr
1155 && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
7506f491 1156 {
b6e47ceb
JL
1157 /* Found another instance of the expression in the same basic block.
1158 Prefer this occurrence to the currently recorded one. We want
1159 the last one in the block and the block is scanned from start
1160 to end. */
1161 avail_occr->insn = insn;
7506f491 1162 }
7506f491
DE
1163 else
1164 {
1165 /* First occurrence of this expression in this basic block. */
1b4572a8 1166 avail_occr = GOBNEW (struct occr);
7506f491 1167 bytes_used += sizeof (struct occr);
7506f491 1168 avail_occr->insn = insn;
b6e47ceb 1169 avail_occr->next = cur_expr->avail_occr;
f9957958 1170 avail_occr->deleted_p = 0;
b6e47ceb 1171 cur_expr->avail_occr = avail_occr;
7506f491
DE
1172 }
1173 }
1174}
1175
43c8a043 1176/* Scan SET present in INSN and add an entry to the hash TABLE. */
7506f491
DE
1177
1178static void
43c8a043 1179hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
7506f491 1180{
43c8a043
EB
1181 rtx src = SET_SRC (set);
1182 rtx dest = SET_DEST (set);
172890a2 1183 rtx note;
7506f491 1184
6e72d1e9 1185 if (GET_CODE (src) == CALL)
02280659 1186 hash_scan_call (src, insn, table);
7506f491 1187
7b1b4aed 1188 else if (REG_P (dest))
7506f491 1189 {
172890a2 1190 unsigned int regno = REGNO (dest);
20160347 1191 int max_distance = 0;
7506f491 1192
29470771
SB
1193 /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1194
90631280
PB
1195 This allows us to do a single GCSE pass and still eliminate
1196 redundant constants, addresses or other expressions that are
29470771
SB
1197 constructed with multiple instructions.
1198
e45425ec 1199 However, keep the original SRC if INSN is a simple reg-reg move.
29470771
SB
1200 In this case, there will almost always be a REG_EQUAL note on the
1201 insn that sets SRC. By recording the REG_EQUAL value here as SRC
1202 for INSN, we miss copy propagation opportunities and we perform the
1203 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1204 do more than one PRE GCSE pass.
1205
fa10beec 1206 Note that this does not impede profitable constant propagations. We
29470771 1207 "look through" reg-reg sets in lookup_avail_set. */
90631280
PB
1208 note = find_reg_equal_equiv_note (insn);
1209 if (note != 0
29470771
SB
1210 && REG_NOTE_KIND (note) == REG_EQUAL
1211 && !REG_P (src)
e45425ec 1212 && want_to_gcse_p (XEXP (note, 0), NULL))
43c8a043 1213 src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
172890a2 1214
7506f491 1215 /* Only record sets of pseudo-regs in the hash table. */
e45425ec 1216 if (regno >= FIRST_PSEUDO_REGISTER
7506f491 1217 /* Don't GCSE something if we can't do a reg/reg copy. */
773eae39 1218 && can_copy_p (GET_MODE (dest))
068473ec 1219 /* GCSE commonly inserts instruction after the insn. We can't
1d65f45c
RH
1220 do that easily for EH edges so disable GCSE on these for now. */
1221 /* ??? We can now easily create new EH landing pads at the
1222 gimple level, for splitting edges; there's no reason we
1223 can't do the same thing at the rtl level. */
1224 && !can_throw_internal (insn)
7506f491 1225 /* Is SET_SRC something we want to gcse? */
20160347 1226 && want_to_gcse_p (src, &max_distance)
172890a2 1227 /* Don't CSE a nop. */
43c8a043 1228 && ! set_noop_p (set)
43e72072
JJ
1229 /* Don't GCSE if it has attached REG_EQUIV note.
1230 At this point this only function parameters should have
1231 REG_EQUIV notes and if the argument slot is used somewhere
a1f300c0 1232 explicitly, it means address of parameter has been taken,
43e72072 1233 so we should not extend the lifetime of the pseudo. */
90631280 1234 && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
7506f491
DE
1235 {
1236 /* An expression is not anticipatable if its operands are
52d76e11 1237 modified before this insn or if this is not the only SET in
6fb5fa3c
DB
1238 this insn. The latter condition does not have to mean that
1239 SRC itself is not anticipatable, but we just will not be
1240 able to handle code motion of insns with multiple sets. */
1241 int antic_p = oprs_anticipatable_p (src, insn)
1242 && !multiple_sets (insn);
7506f491 1243 /* An expression is not available if its operands are
eb296bd9
GK
1244 subsequently modified, including this insn. It's also not
1245 available if this is a branch, because we can't insert
1246 a set after the branch. */
1247 int avail_p = (oprs_available_p (src, insn)
1248 && ! JUMP_P (insn));
c4c81601 1249
20160347
MK
1250 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1251 max_distance, table);
7506f491 1252 }
7506f491 1253 }
d91edf86 1254 /* In case of store we want to consider the memory value as available in
f5f2e3cd
MH
1255 the REG stored in that memory. This makes it possible to remove
1256 redundant loads from due to stores to the same location. */
7b1b4aed 1257 else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
f5f2e3cd
MH
1258 {
1259 unsigned int regno = REGNO (src);
fb039b24 1260 int max_distance = 0;
f5f2e3cd 1261
e45425ec
SB
1262 /* Only record sets of pseudo-regs in the hash table. */
1263 if (regno >= FIRST_PSEUDO_REGISTER
f5f2e3cd
MH
1264 /* Don't GCSE something if we can't do a reg/reg copy. */
1265 && can_copy_p (GET_MODE (src))
1266 /* GCSE commonly inserts instruction after the insn. We can't
1d65f45c
RH
1267 do that easily for EH edges so disable GCSE on these for now. */
1268 && !can_throw_internal (insn)
f5f2e3cd 1269 /* Is SET_DEST something we want to gcse? */
fb039b24 1270 && want_to_gcse_p (dest, &max_distance)
f5f2e3cd 1271 /* Don't CSE a nop. */
43c8a043 1272 && ! set_noop_p (set)
f5f2e3cd
MH
1273 /* Don't GCSE if it has attached REG_EQUIV note.
1274 At this point this only function parameters should have
1275 REG_EQUIV notes and if the argument slot is used somewhere
1276 explicitly, it means address of parameter has been taken,
1277 so we should not extend the lifetime of the pseudo. */
1278 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
7b1b4aed 1279 || ! MEM_P (XEXP (note, 0))))
f5f2e3cd
MH
1280 {
1281 /* Stores are never anticipatable. */
1282 int antic_p = 0;
1283 /* An expression is not available if its operands are
1284 subsequently modified, including this insn. It's also not
1285 available if this is a branch, because we can't insert
1286 a set after the branch. */
1287 int avail_p = oprs_available_p (dest, insn)
1288 && ! JUMP_P (insn);
1289
1290 /* Record the memory expression (DEST) in the hash table. */
4bcaf354 1291 insert_expr_in_table (dest, GET_MODE (dest), insn,
fb039b24 1292 antic_p, avail_p, max_distance, table);
f5f2e3cd
MH
1293 }
1294 }
7506f491
DE
1295}
1296
1297static void
1d088dee 1298hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
7e5487a2 1299 struct hash_table_d *table ATTRIBUTE_UNUSED)
7506f491
DE
1300{
1301 /* Currently nothing to do. */
1302}
1303
1304static void
1d088dee 1305hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
7e5487a2 1306 struct hash_table_d *table ATTRIBUTE_UNUSED)
7506f491
DE
1307{
1308 /* Currently nothing to do. */
1309}
1310
43c8a043 1311/* Process INSN and add hash table entries as appropriate. */
7506f491
DE
1312
1313static void
7e5487a2 1314hash_scan_insn (rtx insn, struct hash_table_d *table)
7506f491
DE
1315{
1316 rtx pat = PATTERN (insn);
c4c81601 1317 int i;
7506f491
DE
1318
1319 /* Pick out the sets of INSN and for other forms of instructions record
1320 what's been modified. */
1321
172890a2 1322 if (GET_CODE (pat) == SET)
02280659 1323 hash_scan_set (pat, insn, table);
43c8a043
EB
1324
1325 else if (GET_CODE (pat) == CLOBBER)
1326 hash_scan_clobber (pat, insn, table);
1327
1328 else if (GET_CODE (pat) == CALL)
1329 hash_scan_call (pat, insn, table);
1330
7506f491 1331 else if (GET_CODE (pat) == PARALLEL)
c4c81601
RK
1332 for (i = 0; i < XVECLEN (pat, 0); i++)
1333 {
1334 rtx x = XVECEXP (pat, 0, i);
7506f491 1335
c4c81601 1336 if (GET_CODE (x) == SET)
02280659 1337 hash_scan_set (x, insn, table);
c4c81601 1338 else if (GET_CODE (x) == CLOBBER)
02280659 1339 hash_scan_clobber (x, insn, table);
6e72d1e9 1340 else if (GET_CODE (x) == CALL)
02280659 1341 hash_scan_call (x, insn, table);
c4c81601 1342 }
7506f491
DE
1343}
1344
43c8a043
EB
1345/* Dump the hash table TABLE to file FILE under the name NAME. */
1346
7506f491 1347static void
7e5487a2 1348dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
7506f491
DE
1349{
1350 int i;
1351 /* Flattened out table, so it's printed in proper order. */
4da896b2
MM
1352 struct expr **flat_table;
1353 unsigned int *hash_val;
c4c81601 1354 struct expr *expr;
4da896b2 1355
1b4572a8
KG
1356 flat_table = XCNEWVEC (struct expr *, table->n_elems);
1357 hash_val = XNEWVEC (unsigned int, table->n_elems);
7506f491 1358
02280659
ZD
1359 for (i = 0; i < (int) table->size; i++)
1360 for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
c4c81601
RK
1361 {
1362 flat_table[expr->bitmap_index] = expr;
1363 hash_val[expr->bitmap_index] = i;
1364 }
7506f491
DE
1365
1366 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
02280659 1367 name, table->size, table->n_elems);
7506f491 1368
02280659 1369 for (i = 0; i < (int) table->n_elems; i++)
21318741
RK
1370 if (flat_table[i] != 0)
1371 {
a0ac9e5a 1372 expr = flat_table[i];
20160347
MK
1373 fprintf (file, "Index %d (hash value %d; max distance %d)\n ",
1374 expr->bitmap_index, hash_val[i], expr->max_distance);
a0ac9e5a 1375 print_rtl (file, expr->expr);
21318741
RK
1376 fprintf (file, "\n");
1377 }
7506f491
DE
1378
1379 fprintf (file, "\n");
4da896b2 1380
4da896b2
MM
1381 free (flat_table);
1382 free (hash_val);
7506f491
DE
1383}
1384
1385/* Record register first/last/block set information for REGNO in INSN.
c4c81601 1386
80c29cc4 1387 first_set records the first place in the block where the register
7506f491 1388 is set and is used to compute "anticipatability".
c4c81601 1389
80c29cc4 1390 last_set records the last place in the block where the register
7506f491 1391 is set and is used to compute "availability".
c4c81601 1392
80c29cc4 1393 last_bb records the block for which first_set and last_set are
4a81774c 1394 valid, as a quick test to invalidate them. */
7506f491
DE
1395
1396static void
1d088dee 1397record_last_reg_set_info (rtx insn, int regno)
7506f491 1398{
80c29cc4 1399 struct reg_avail_info *info = &reg_avail_info[regno];
4a81774c 1400 int luid = DF_INSN_LUID (insn);
c4c81601 1401
4a81774c 1402 info->last_set = luid;
80c29cc4
RZ
1403 if (info->last_bb != current_bb)
1404 {
1405 info->last_bb = current_bb;
4a81774c 1406 info->first_set = luid;
80c29cc4 1407 }
7506f491
DE
1408}
1409
a13d4ebf
AM
1410/* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1411 Note we store a pair of elements in the list, so they have to be
1412 taken off pairwise. */
1413
589005ff 1414static void
43c8a043 1415canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1d088dee 1416 void * v_insn)
a13d4ebf
AM
1417{
1418 rtx dest_addr, insn;
0fe854a7 1419 int bb;
f32682ca 1420 modify_pair pair;
a13d4ebf
AM
1421
1422 while (GET_CODE (dest) == SUBREG
1423 || GET_CODE (dest) == ZERO_EXTRACT
a13d4ebf
AM
1424 || GET_CODE (dest) == STRICT_LOW_PART)
1425 dest = XEXP (dest, 0);
1426
1427 /* If DEST is not a MEM, then it will not conflict with a load. Note
1428 that function calls are assumed to clobber memory, but are handled
1429 elsewhere. */
1430
7b1b4aed 1431 if (! MEM_P (dest))
a13d4ebf
AM
1432 return;
1433
1434 dest_addr = get_addr (XEXP (dest, 0));
1435 dest_addr = canon_rtx (dest_addr);
589005ff 1436 insn = (rtx) v_insn;
b0de17ef 1437 bb = BLOCK_FOR_INSN (insn)->index;
a13d4ebf 1438
f32682ca
DN
1439 pair.dest = dest;
1440 pair.dest_addr = dest_addr;
1441 VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], pair);
a13d4ebf
AM
1442}
1443
a13d4ebf
AM
1444/* Record memory modification information for INSN. We do not actually care
1445 about the memory location(s) that are set, or even how they are set (consider
1446 a CALL_INSN). We merely need to record which insns modify memory. */
7506f491
DE
1447
1448static void
1d088dee 1449record_last_mem_set_info (rtx insn)
7506f491 1450{
b0de17ef 1451 int bb = BLOCK_FOR_INSN (insn)->index;
0fe854a7 1452
ccef9ef5 1453 /* load_killed_in_block_p will handle the case of calls clobbering
dc297297 1454 everything. */
6409abe3 1455 VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
0fe854a7 1456 bitmap_set_bit (modify_mem_list_set, bb);
a13d4ebf 1457
7b1b4aed 1458 if (CALL_P (insn))
6ce1edcf 1459 bitmap_set_bit (blocks_with_calls, bb);
a13d4ebf 1460 else
0fe854a7 1461 note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
7506f491
DE
1462}
1463
7506f491 1464/* Called from compute_hash_table via note_stores to handle one
84832317
MM
1465 SET or CLOBBER in an insn. DATA is really the instruction in which
1466 the SET is taking place. */
7506f491
DE
1467
1468static void
7bc980e1 1469record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
7506f491 1470{
84832317
MM
1471 rtx last_set_insn = (rtx) data;
1472
7506f491
DE
1473 if (GET_CODE (dest) == SUBREG)
1474 dest = SUBREG_REG (dest);
1475
7b1b4aed 1476 if (REG_P (dest))
7506f491 1477 record_last_reg_set_info (last_set_insn, REGNO (dest));
7b1b4aed 1478 else if (MEM_P (dest)
7506f491
DE
1479 /* Ignore pushes, they clobber nothing. */
1480 && ! push_operand (dest, GET_MODE (dest)))
1481 record_last_mem_set_info (last_set_insn);
1482}
1483
e45425ec 1484/* Top level function to create an expression hash table.
7506f491
DE
1485
1486 Expression entries are placed in the hash table if
1487 - they are of the form (set (pseudo-reg) src),
1488 - src is something we want to perform GCSE on,
1489 - none of the operands are subsequently modified in the block
1490
7506f491
DE
1491 Currently src must be a pseudo-reg or a const_int.
1492
02280659 1493 TABLE is the table computed. */
7506f491
DE
1494
1495static void
7e5487a2 1496compute_hash_table_work (struct hash_table_d *table)
7506f491 1497{
5f39ad47 1498 int i;
7506f491 1499
a13d4ebf 1500 /* re-Cache any INSN_LIST nodes we have allocated. */
73991d6a 1501 clear_modify_mem_tables ();
7506f491 1502 /* Some working arrays used to track first and last set in each block. */
5f39ad47 1503 reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
80c29cc4 1504
5f39ad47 1505 for (i = 0; i < max_reg_num (); ++i)
e0082a72 1506 reg_avail_info[i].last_bb = NULL;
7506f491 1507
e0082a72 1508 FOR_EACH_BB (current_bb)
7506f491
DE
1509 {
1510 rtx insn;
770ae6cc 1511 unsigned int regno;
7506f491
DE
1512
1513 /* First pass over the instructions records information used to
4a81774c 1514 determine when registers and memory are first and last set. */
eb232f4e 1515 FOR_BB_INSNS (current_bb, insn)
7506f491 1516 {
a344c9f1 1517 if (!NONDEBUG_INSN_P (insn))
7506f491
DE
1518 continue;
1519
7b1b4aed 1520 if (CALL_P (insn))
7506f491 1521 {
c7fb4c7a
SB
1522 hard_reg_set_iterator hrsi;
1523 EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call,
1524 0, regno, hrsi)
1525 record_last_reg_set_info (insn, regno);
c4c81601 1526
e45425ec
SB
1527 if (! RTL_CONST_OR_PURE_CALL_P (insn))
1528 record_last_mem_set_info (insn);
7506f491
DE
1529 }
1530
84832317 1531 note_stores (PATTERN (insn), record_last_set_info, insn);
7506f491
DE
1532 }
1533
1534 /* The next pass builds the hash table. */
eb232f4e 1535 FOR_BB_INSNS (current_bb, insn)
a344c9f1 1536 if (NONDEBUG_INSN_P (insn))
4a8cae83 1537 hash_scan_insn (insn, table);
7506f491
DE
1538 }
1539
80c29cc4
RZ
1540 free (reg_avail_info);
1541 reg_avail_info = NULL;
7506f491
DE
1542}
1543
02280659 1544/* Allocate space for the set/expr hash TABLE.
e45425ec 1545 It is used to determine the number of buckets to use. */
7506f491
DE
1546
1547static void
e45425ec 1548alloc_hash_table (struct hash_table_d *table)
7506f491
DE
1549{
1550 int n;
1551
b5b8b0ac
AO
1552 n = get_max_insn_count ();
1553
1554 table->size = n / 4;
02280659
ZD
1555 if (table->size < 11)
1556 table->size = 11;
c4c81601 1557
7506f491
DE
1558 /* Attempt to maintain efficient use of hash table.
1559 Making it an odd number is simplest for now.
1560 ??? Later take some measurements. */
02280659
ZD
1561 table->size |= 1;
1562 n = table->size * sizeof (struct expr *);
1b4572a8 1563 table->table = GNEWVAR (struct expr *, n);
7506f491
DE
1564}
1565
02280659 1566/* Free things allocated by alloc_hash_table. */
7506f491
DE
1567
1568static void
7e5487a2 1569free_hash_table (struct hash_table_d *table)
7506f491 1570{
02280659 1571 free (table->table);
7506f491
DE
1572}
1573
e45425ec 1574/* Compute the expression hash table TABLE. */
7506f491
DE
1575
1576static void
7e5487a2 1577compute_hash_table (struct hash_table_d *table)
7506f491
DE
1578{
1579 /* Initialize count of number of entries in hash table. */
02280659 1580 table->n_elems = 0;
703ad42b 1581 memset (table->table, 0, table->size * sizeof (struct expr *));
7506f491 1582
02280659 1583 compute_hash_table_work (table);
7506f491
DE
1584}
1585\f
1586/* Expression tracking support. */
1587
e45425ec
SB
1588/* Clear canon_modify_mem_list and modify_mem_list tables. */
1589static void
1590clear_modify_mem_tables (void)
0e3f0221 1591{
e45425ec
SB
1592 unsigned i;
1593 bitmap_iterator bi;
0e3f0221 1594
e45425ec 1595 EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
0e3f0221 1596 {
6409abe3 1597 VEC_free (rtx, heap, modify_mem_list[i]);
6ce1edcf 1598 VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
0e3f0221 1599 }
e45425ec
SB
1600 bitmap_clear (modify_mem_list_set);
1601 bitmap_clear (blocks_with_calls);
0e3f0221
RS
1602}
1603
e45425ec 1604/* Release memory used by modify_mem_list_set. */
0e3f0221 1605
e45425ec
SB
1606static void
1607free_modify_mem_tables (void)
e129b3f9 1608{
e45425ec
SB
1609 clear_modify_mem_tables ();
1610 free (modify_mem_list);
1611 free (canon_modify_mem_list);
1612 modify_mem_list = 0;
1613 canon_modify_mem_list = 0;
e129b3f9 1614}
e45425ec
SB
1615\f
1616/* For each block, compute whether X is transparent. X is either an
1617 expression or an assignment [though we don't care which, for this context
1618 an assignment is treated as an expression]. For each block where an
1619 element of X is modified, reset the INDX bit in BMAP. */
0e3f0221 1620
e45425ec
SB
1621static void
1622compute_transp (const_rtx x, int indx, sbitmap *bmap)
0e3f0221 1623{
e45425ec
SB
1624 int i, j;
1625 enum rtx_code code;
1626 const char *fmt;
0e3f0221 1627
e45425ec
SB
1628 /* repeat is used to turn tail-recursion into iteration since GCC
1629 can't do it when there's no return value. */
1630 repeat:
0e3f0221 1631
e45425ec
SB
1632 if (x == 0)
1633 return;
72b8d451 1634
e45425ec
SB
1635 code = GET_CODE (x);
1636 switch (code)
0e3f0221 1637 {
e45425ec 1638 case REG:
628f6a4e 1639 {
e45425ec
SB
1640 df_ref def;
1641 for (def = DF_REG_DEF_CHAIN (REGNO (x));
1642 def;
1643 def = DF_REF_NEXT_REG (def))
1644 RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
628f6a4e 1645 }
7821bfc7 1646
e45425ec 1647 return;
72b8d451 1648
e45425ec
SB
1649 case MEM:
1650 if (! MEM_READONLY_P (x))
0e3f0221 1651 {
e45425ec
SB
1652 bitmap_iterator bi;
1653 unsigned bb_index;
e129b3f9 1654
e45425ec
SB
1655 /* First handle all the blocks with calls. We don't need to
1656 do any list walking for them. */
1657 EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1658 {
1659 RESET_BIT (bmap[bb_index], indx);
1660 }
0e3f0221 1661
e45425ec
SB
1662 /* Now iterate over the blocks which have memory modifications
1663 but which do not have any calls. */
1664 EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1665 blocks_with_calls,
1666 0, bb_index, bi)
1667 {
6409abe3 1668 VEC (modify_pair,heap) *list
6ce1edcf
NF
1669 = canon_modify_mem_list[bb_index];
1670 modify_pair *pair;
1671 unsigned ix;
0e3f0221 1672
6ce1edcf 1673 FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
e45425ec 1674 {
6ce1edcf
NF
1675 rtx dest = pair->dest;
1676 rtx dest_addr = pair->dest_addr;
0e3f0221 1677
53d9622b
RS
1678 if (canon_true_dependence (dest, GET_MODE (dest),
1679 dest_addr, x, NULL_RTX))
6ce1edcf 1680 RESET_BIT (bmap[bb_index], indx);
e45425ec
SB
1681 }
1682 }
0e3f0221 1683 }
0e3f0221 1684
e45425ec
SB
1685 x = XEXP (x, 0);
1686 goto repeat;
0e3f0221 1687
e45425ec
SB
1688 case PC:
1689 case CC0: /*FIXME*/
1690 case CONST:
d8116890 1691 CASE_CONST_ANY:
e45425ec
SB
1692 case SYMBOL_REF:
1693 case LABEL_REF:
1694 case ADDR_VEC:
1695 case ADDR_DIFF_VEC:
1696 return;
0e3f0221 1697
e45425ec
SB
1698 default:
1699 break;
1700 }
7821bfc7 1701
e45425ec 1702 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
0e3f0221 1703 {
e45425ec 1704 if (fmt[i] == 'e')
0e3f0221 1705 {
e45425ec
SB
1706 /* If we are about to do the last recursive call
1707 needed at this level, change it into iteration.
1708 This function is called enough to be worth it. */
1709 if (i == 0)
1710 {
1711 x = XEXP (x, i);
1712 goto repeat;
1713 }
1714
1715 compute_transp (XEXP (x, i), indx, bmap);
0e3f0221 1716 }
e45425ec
SB
1717 else if (fmt[i] == 'E')
1718 for (j = 0; j < XVECLEN (x, i); j++)
1719 compute_transp (XVECEXP (x, i, j), indx, bmap);
0e3f0221 1720 }
0e3f0221
RS
1721}
1722\f
a65f3558 1723/* Compute PRE+LCM working variables. */
7506f491
DE
1724
1725/* Local properties of expressions. */
43c8a043 1726
7506f491 1727/* Nonzero for expressions that are transparent in the block. */
a65f3558 1728static sbitmap *transp;
7506f491 1729
a65f3558
JL
1730/* Nonzero for expressions that are computed (available) in the block. */
1731static sbitmap *comp;
7506f491 1732
a65f3558
JL
1733/* Nonzero for expressions that are locally anticipatable in the block. */
1734static sbitmap *antloc;
7506f491 1735
a65f3558
JL
1736/* Nonzero for expressions where this block is an optimal computation
1737 point. */
1738static sbitmap *pre_optimal;
5c35539b 1739
a65f3558
JL
1740/* Nonzero for expressions which are redundant in a particular block. */
1741static sbitmap *pre_redundant;
7506f491 1742
a42cd965
AM
1743/* Nonzero for expressions which should be inserted on a specific edge. */
1744static sbitmap *pre_insert_map;
1745
1746/* Nonzero for expressions which should be deleted in a specific block. */
1747static sbitmap *pre_delete_map;
1748
a65f3558 1749/* Allocate vars used for PRE analysis. */
7506f491
DE
1750
1751static void
1d088dee 1752alloc_pre_mem (int n_blocks, int n_exprs)
7506f491 1753{
a65f3558
JL
1754 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1755 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1756 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
5faf03ae 1757
a42cd965
AM
1758 pre_optimal = NULL;
1759 pre_redundant = NULL;
1760 pre_insert_map = NULL;
1761 pre_delete_map = NULL;
a42cd965 1762 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
c4c81601 1763
a42cd965 1764 /* pre_insert and pre_delete are allocated later. */
7506f491
DE
1765}
1766
a65f3558 1767/* Free vars used for PRE analysis. */
7506f491
DE
1768
1769static void
1d088dee 1770free_pre_mem (void)
7506f491 1771{
5a660bff
DB
1772 sbitmap_vector_free (transp);
1773 sbitmap_vector_free (comp);
bd3675fc
JL
1774
1775 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
7506f491 1776
a42cd965 1777 if (pre_optimal)
5a660bff 1778 sbitmap_vector_free (pre_optimal);
a42cd965 1779 if (pre_redundant)
5a660bff 1780 sbitmap_vector_free (pre_redundant);
a42cd965 1781 if (pre_insert_map)
5a660bff 1782 sbitmap_vector_free (pre_insert_map);
a42cd965 1783 if (pre_delete_map)
5a660bff 1784 sbitmap_vector_free (pre_delete_map);
a42cd965 1785
bd3675fc 1786 transp = comp = NULL;
a42cd965 1787 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
7506f491
DE
1788}
1789
9b774782
JL
1790/* Remove certain expressions from anticipatable and transparent
1791 sets of basic blocks that have incoming abnormal edge.
1792 For PRE remove potentially trapping expressions to avoid placing
1793 them on abnormal edges. For hoisting remove memory references that
1794 can be clobbered by calls. */
7506f491
DE
1795
1796static void
9b774782 1797prune_expressions (bool pre_p)
7506f491 1798{
9b774782 1799 sbitmap prune_exprs;
43c8a043 1800 struct expr *expr;
b614171e 1801 unsigned int ui;
9b774782 1802 basic_block bb;
c66e8ae9 1803
9b774782
JL
1804 prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1805 sbitmap_zero (prune_exprs);
02280659 1806 for (ui = 0; ui < expr_hash_table.size; ui++)
b614171e 1807 {
43c8a043 1808 for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
9b774782
JL
1809 {
1810 /* Note potentially trapping expressions. */
43c8a043 1811 if (may_trap_p (expr->expr))
9b774782 1812 {
43c8a043 1813 SET_BIT (prune_exprs, expr->bitmap_index);
9b774782
JL
1814 continue;
1815 }
b614171e 1816
43c8a043 1817 if (!pre_p && MEM_P (expr->expr))
9b774782
JL
1818 /* Note memory references that can be clobbered by a call.
1819 We do not split abnormal edges in hoisting, so would
1820 a memory reference get hoisted along an abnormal edge,
1821 it would be placed /before/ the call. Therefore, only
1822 constant memory references can be hoisted along abnormal
1823 edges. */
1824 {
43c8a043
EB
1825 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1826 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
9b774782 1827 continue;
c66e8ae9 1828
43c8a043
EB
1829 if (MEM_READONLY_P (expr->expr)
1830 && !MEM_VOLATILE_P (expr->expr)
1831 && MEM_NOTRAP_P (expr->expr))
9b774782
JL
1832 /* Constant memory reference, e.g., a PIC address. */
1833 continue;
1834
1835 /* ??? Optimally, we would use interprocedural alias
1836 analysis to determine if this mem is actually killed
1837 by this call. */
1838
43c8a043 1839 SET_BIT (prune_exprs, expr->bitmap_index);
9b774782
JL
1840 }
1841 }
1842 }
c66e8ae9 1843
e0082a72 1844 FOR_EACH_BB (bb)
c66e8ae9 1845 {
b614171e 1846 edge e;
628f6a4e 1847 edge_iterator ei;
b614171e
MM
1848
1849 /* If the current block is the destination of an abnormal edge, we
9b774782
JL
1850 kill all trapping (for PRE) and memory (for hoist) expressions
1851 because we won't be able to properly place the instruction on
1852 the edge. So make them neither anticipatable nor transparent.
1853 This is fairly conservative.
1854
1855 ??? For hoisting it may be necessary to check for set-and-jump
1856 instructions here, not just for abnormal edges. The general problem
1857 is that when an expression cannot not be placed right at the end of
1858 a basic block we should account for any side-effects of a subsequent
1859 jump instructions that could clobber the expression. It would
1860 be best to implement this check along the lines of
1861 hoist_expr_reaches_here_p where the target block is already known
1862 and, hence, there's no need to conservatively prune expressions on
1863 "intermediate" set-and-jump instructions. */
628f6a4e 1864 FOR_EACH_EDGE (e, ei, bb->preds)
9b774782
JL
1865 if ((e->flags & EDGE_ABNORMAL)
1866 && (pre_p || CALL_P (BB_END (e->src))))
b614171e 1867 {
9b774782
JL
1868 sbitmap_difference (antloc[bb->index],
1869 antloc[bb->index], prune_exprs);
1870 sbitmap_difference (transp[bb->index],
1871 transp[bb->index], prune_exprs);
b614171e
MM
1872 break;
1873 }
9b774782
JL
1874 }
1875
1876 sbitmap_free (prune_exprs);
1877}
1878
29fa95ed
JL
1879/* It may be necessary to insert a large number of insns on edges to
1880 make the existing occurrences of expressions fully redundant. This
1881 routine examines the set of insertions and deletions and if the ratio
1882 of insertions to deletions is too high for a particular expression, then
1883 the expression is removed from the insertion/deletion sets.
1884
1885 N_ELEMS is the number of elements in the hash table. */
1886
1887static void
1888prune_insertions_deletions (int n_elems)
1889{
1890 sbitmap_iterator sbi;
1891 sbitmap prune_exprs;
1892
1893 /* We always use I to iterate over blocks/edges and J to iterate over
1894 expressions. */
1895 unsigned int i, j;
1896
1897 /* Counts for the number of times an expression needs to be inserted and
1898 number of times an expression can be removed as a result. */
1899 int *insertions = GCNEWVEC (int, n_elems);
1900 int *deletions = GCNEWVEC (int, n_elems);
1901
1902 /* Set of expressions which require too many insertions relative to
1903 the number of deletions achieved. We will prune these out of the
1904 insertion/deletion sets. */
1905 prune_exprs = sbitmap_alloc (n_elems);
1906 sbitmap_zero (prune_exprs);
1907
1908 /* Iterate over the edges counting the number of times each expression
1909 needs to be inserted. */
1910 for (i = 0; i < (unsigned) n_edges; i++)
1911 {
1912 EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1913 insertions[j]++;
1914 }
1915
1916 /* Similarly for deletions, but those occur in blocks rather than on
1917 edges. */
1918 for (i = 0; i < (unsigned) last_basic_block; i++)
1919 {
1920 EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1921 deletions[j]++;
1922 }
1923
1924 /* Now that we have accurate counts, iterate over the elements in the
1925 hash table and see if any need too many insertions relative to the
1926 number of evaluations that can be removed. If so, mark them in
1927 PRUNE_EXPRS. */
1928 for (j = 0; j < (unsigned) n_elems; j++)
1929 if (deletions[j]
1930 && ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1931 SET_BIT (prune_exprs, j);
1932
1933 /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1934 EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1935 {
1936 for (i = 0; i < (unsigned) n_edges; i++)
1937 RESET_BIT (pre_insert_map[i], j);
1938
1939 for (i = 0; i < (unsigned) last_basic_block; i++)
1940 RESET_BIT (pre_delete_map[i], j);
1941 }
1942
1943 sbitmap_free (prune_exprs);
1944 free (insertions);
1945 free (deletions);
1946}
1947
9b774782 1948/* Top level routine to do the dataflow analysis needed by PRE. */
b614171e 1949
43c8a043 1950static struct edge_list *
9b774782
JL
1951compute_pre_data (void)
1952{
43c8a043 1953 struct edge_list *edge_list;
9b774782
JL
1954 basic_block bb;
1955
1956 compute_local_properties (transp, comp, antloc, &expr_hash_table);
1957 prune_expressions (true);
1958 sbitmap_vector_zero (ae_kill, last_basic_block);
1959
1960 /* Compute ae_kill for each basic block using:
1961
1962 ~(TRANSP | COMP)
1963 */
1964
1965 FOR_EACH_BB (bb)
1966 {
e0082a72
ZD
1967 sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1968 sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
c66e8ae9
JL
1969 }
1970
10d22567 1971 edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
a42cd965 1972 ae_kill, &pre_insert_map, &pre_delete_map);
5a660bff 1973 sbitmap_vector_free (antloc);
bd3675fc 1974 antloc = NULL;
5a660bff 1975 sbitmap_vector_free (ae_kill);
589005ff 1976 ae_kill = NULL;
29fa95ed
JL
1977
1978 prune_insertions_deletions (expr_hash_table.n_elems);
43c8a043
EB
1979
1980 return edge_list;
7506f491
DE
1981}
1982\f
1983/* PRE utilities */
1984
cc2902df 1985/* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
a65f3558 1986 block BB.
7506f491
DE
1987
1988 VISITED is a pointer to a working buffer for tracking which BB's have
1989 been visited. It is NULL for the top-level call.
1990
1991 We treat reaching expressions that go through blocks containing the same
1992 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
1993 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1994 2 as not reaching. The intent is to improve the probability of finding
1995 only one reaching expression and to reduce register lifetimes by picking
1996 the closest such expression. */
1997
1998static int
43c8a043
EB
1999pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2000 basic_block bb, char *visited)
7506f491 2001{
36349f8b 2002 edge pred;
628f6a4e 2003 edge_iterator ei;
b8698a0f 2004
628f6a4e 2005 FOR_EACH_EDGE (pred, ei, bb->preds)
7506f491 2006 {
e2d2ed72 2007 basic_block pred_bb = pred->src;
7506f491 2008
36349f8b 2009 if (pred->src == ENTRY_BLOCK_PTR
7506f491 2010 /* Has predecessor has already been visited? */
0b17ab2f 2011 || visited[pred_bb->index])
c4c81601
RK
2012 ;/* Nothing to do. */
2013
7506f491 2014 /* Does this predecessor generate this expression? */
0b17ab2f 2015 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
7506f491
DE
2016 {
2017 /* Is this the occurrence we're looking for?
2018 Note that there's only one generating occurrence per block
2019 so we just need to check the block number. */
a65f3558 2020 if (occr_bb == pred_bb)
7506f491 2021 return 1;
c4c81601 2022
0b17ab2f 2023 visited[pred_bb->index] = 1;
7506f491
DE
2024 }
2025 /* Ignore this predecessor if it kills the expression. */
0b17ab2f
RH
2026 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2027 visited[pred_bb->index] = 1;
c4c81601 2028
7506f491
DE
2029 /* Neither gen nor kill. */
2030 else
ac7c5af5 2031 {
0b17ab2f 2032 visited[pred_bb->index] = 1;
89e606c9 2033 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
7506f491 2034 return 1;
ac7c5af5 2035 }
7506f491
DE
2036 }
2037
2038 /* All paths have been checked. */
2039 return 0;
2040}
283a2545
RL
2041
2042/* The wrapper for pre_expr_reaches_here_work that ensures that any
dc297297 2043 memory allocated for that function is returned. */
283a2545
RL
2044
2045static int
1d088dee 2046pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
283a2545
RL
2047{
2048 int rval;
5ed6ace5 2049 char *visited = XCNEWVEC (char, last_basic_block);
283a2545 2050
8e42ace1 2051 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
283a2545
RL
2052
2053 free (visited);
c4c81601 2054 return rval;
283a2545 2055}
7506f491 2056\f
43c8a043 2057/* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
a42cd965
AM
2058
2059static rtx
1d088dee 2060process_insert_insn (struct expr *expr)
a42cd965
AM
2061{
2062 rtx reg = expr->reaching_reg;
43c8a043 2063 /* Copy the expression to make sure we don't have any sharing issues. */
fb0c0a12
RK
2064 rtx exp = copy_rtx (expr->expr);
2065 rtx pat;
a42cd965
AM
2066
2067 start_sequence ();
fb0c0a12
RK
2068
2069 /* If the expression is something that's an operand, like a constant,
2070 just copy it to a register. */
2071 if (general_operand (exp, GET_MODE (reg)))
2072 emit_move_insn (reg, exp);
2073
2074 /* Otherwise, make a new insn to compute this expression and make sure the
43c8a043 2075 insn will be recognized (this also adds any needed CLOBBERs). */
282899df
NS
2076 else
2077 {
2078 rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2079
57ac4c34 2080 if (insn_invalid_p (insn, false))
2f021b67 2081 gcc_unreachable ();
282899df 2082 }
b8698a0f 2083
2f937369 2084 pat = get_insns ();
a42cd965
AM
2085 end_sequence ();
2086
2087 return pat;
2088}
589005ff 2089
a65f3558
JL
2090/* Add EXPR to the end of basic block BB.
2091
eae7938e 2092 This is used by both the PRE and code hoisting. */
7506f491
DE
2093
2094static void
eae7938e 2095insert_insn_end_basic_block (struct expr *expr, basic_block bb)
7506f491 2096{
a813c111 2097 rtx insn = BB_END (bb);
7506f491
DE
2098 rtx new_insn;
2099 rtx reg = expr->reaching_reg;
2100 int regno = REGNO (reg);
2f937369 2101 rtx pat, pat_end;
7506f491 2102
a42cd965 2103 pat = process_insert_insn (expr);
282899df 2104 gcc_assert (pat && INSN_P (pat));
2f937369
DM
2105
2106 pat_end = pat;
2107 while (NEXT_INSN (pat_end) != NULL_RTX)
2108 pat_end = NEXT_INSN (pat_end);
7506f491
DE
2109
2110 /* If the last insn is a jump, insert EXPR in front [taking care to
4d6922ee 2111 handle cc0, etc. properly]. Similarly we need to care trapping
068473ec 2112 instructions in presence of non-call exceptions. */
7506f491 2113
7b1b4aed 2114 if (JUMP_P (insn)
4b4bf941 2115 || (NONJUMP_INSN_P (insn)
c5cbcccf
ZD
2116 && (!single_succ_p (bb)
2117 || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
7506f491 2118 {
50b2596f 2119#ifdef HAVE_cc0
7506f491 2120 rtx note;
50b2596f 2121#endif
7506f491
DE
2122
2123 /* If this is a jump table, then we can't insert stuff here. Since
2124 we know the previous real insn must be the tablejump, we insert
2125 the new instruction just before the tablejump. */
2126 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2127 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
03f43d3d 2128 insn = prev_active_insn (insn);
7506f491
DE
2129
2130#ifdef HAVE_cc0
2131 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2132 if cc0 isn't set. */
2133 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2134 if (note)
2135 insn = XEXP (note, 0);
2136 else
2137 {
2138 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2139 if (maybe_cc0_setter
2c3c49de 2140 && INSN_P (maybe_cc0_setter)
7506f491
DE
2141 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2142 insn = maybe_cc0_setter;
2143 }
2144#endif
2145 /* FIXME: What if something in cc0/jump uses value set in new insn? */
6fb5fa3c 2146 new_insn = emit_insn_before_noloc (pat, insn, bb);
3947e2f9 2147 }
c4c81601 2148
3947e2f9
RH
2149 /* Likewise if the last insn is a call, as will happen in the presence
2150 of exception handling. */
7b1b4aed 2151 else if (CALL_P (insn)
c5cbcccf
ZD
2152 && (!single_succ_p (bb)
2153 || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
3947e2f9 2154 {
42db504c
SB
2155 /* Keeping in mind targets with small register classes and parameters
2156 in registers, we search backward and place the instructions before
2157 the first parameter is loaded. Do this for everyone for consistency
eae7938e 2158 and a presumption that we'll get better code elsewhere as well. */
3947e2f9
RH
2159
2160 /* Since different machines initialize their parameter registers
2161 in different orders, assume nothing. Collect the set of all
2162 parameter registers. */
a813c111 2163 insn = find_first_parameter_load (insn, BB_HEAD (bb));
3947e2f9 2164
b1d26727
JL
2165 /* If we found all the parameter loads, then we want to insert
2166 before the first parameter load.
2167
2168 If we did not find all the parameter loads, then we might have
2169 stopped on the head of the block, which could be a CODE_LABEL.
2170 If we inserted before the CODE_LABEL, then we would be putting
2171 the insn in the wrong basic block. In that case, put the insn
b5229628 2172 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
7b1b4aed 2173 while (LABEL_P (insn)
589ca5cb 2174 || NOTE_INSN_BASIC_BLOCK_P (insn))
b5229628 2175 insn = NEXT_INSN (insn);
c4c81601 2176
6fb5fa3c 2177 new_insn = emit_insn_before_noloc (pat, insn, bb);
7506f491
DE
2178 }
2179 else
6fb5fa3c 2180 new_insn = emit_insn_after_noloc (pat, insn, bb);
7506f491 2181
2f937369 2182 while (1)
a65f3558 2183 {
2f937369 2184 if (INSN_P (pat))
4a81774c 2185 add_label_notes (PATTERN (pat), new_insn);
2f937369
DM
2186 if (pat == pat_end)
2187 break;
2188 pat = NEXT_INSN (pat);
a65f3558 2189 }
3947e2f9 2190
7506f491
DE
2191 gcse_create_count++;
2192
10d22567 2193 if (dump_file)
7506f491 2194 {
10d22567 2195 fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
0b17ab2f 2196 bb->index, INSN_UID (new_insn));
10d22567 2197 fprintf (dump_file, "copying expression %d to reg %d\n",
c4c81601 2198 expr->bitmap_index, regno);
7506f491
DE
2199 }
2200}
2201
a42cd965
AM
2202/* Insert partially redundant expressions on edges in the CFG to make
2203 the expressions fully redundant. */
7506f491 2204
a42cd965 2205static int
1d088dee 2206pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
7506f491 2207{
c4c81601 2208 int e, i, j, num_edges, set_size, did_insert = 0;
a65f3558
JL
2209 sbitmap *inserted;
2210
a42cd965
AM
2211 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2212 if it reaches any of the deleted expressions. */
7506f491 2213
a42cd965
AM
2214 set_size = pre_insert_map[0]->size;
2215 num_edges = NUM_EDGES (edge_list);
02280659 2216 inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
a42cd965 2217 sbitmap_vector_zero (inserted, num_edges);
7506f491 2218
a42cd965 2219 for (e = 0; e < num_edges; e++)
7506f491
DE
2220 {
2221 int indx;
e2d2ed72 2222 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
a65f3558 2223
a65f3558 2224 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
7506f491 2225 {
a42cd965 2226 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
7506f491 2227
43c8a043
EB
2228 for (j = indx;
2229 insert && j < (int) expr_hash_table.n_elems;
2230 j++, insert >>= 1)
c4c81601
RK
2231 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2232 {
2233 struct expr *expr = index_map[j];
2234 struct occr *occr;
a65f3558 2235
ff7cc307 2236 /* Now look at each deleted occurrence of this expression. */
c4c81601
RK
2237 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2238 {
2239 if (! occr->deleted_p)
2240 continue;
2241
3f117656 2242 /* Insert this expression on this edge if it would
ff7cc307 2243 reach the deleted occurrence in BB. */
c4c81601
RK
2244 if (!TEST_BIT (inserted[e], j))
2245 {
2246 rtx insn;
2247 edge eg = INDEX_EDGE (edge_list, e);
2248
2249 /* We can't insert anything on an abnormal and
2250 critical edge, so we insert the insn at the end of
2251 the previous block. There are several alternatives
2252 detailed in Morgans book P277 (sec 10.5) for
2253 handling this situation. This one is easiest for
2254 now. */
2255
b16aa8a5 2256 if (eg->flags & EDGE_ABNORMAL)
eae7938e 2257 insert_insn_end_basic_block (index_map[j], bb);
c4c81601
RK
2258 else
2259 {
2260 insn = process_insert_insn (index_map[j]);
2261 insert_insn_on_edge (insn, eg);
2262 }
2263
10d22567 2264 if (dump_file)
c4c81601 2265 {
5f39ad47 2266 fprintf (dump_file, "PRE: edge (%d,%d), ",
0b17ab2f
RH
2267 bb->index,
2268 INDEX_EDGE_SUCC_BB (edge_list, e)->index);
10d22567 2269 fprintf (dump_file, "copy expression %d\n",
c4c81601
RK
2270 expr->bitmap_index);
2271 }
2272
a13d4ebf 2273 update_ld_motion_stores (expr);
c4c81601
RK
2274 SET_BIT (inserted[e], j);
2275 did_insert = 1;
2276 gcse_create_count++;
2277 }
2278 }
2279 }
7506f491
DE
2280 }
2281 }
5faf03ae 2282
5a660bff 2283 sbitmap_vector_free (inserted);
a42cd965 2284 return did_insert;
7506f491
DE
2285}
2286
073089a7 2287/* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
b885908b
MH
2288 Given "old_reg <- expr" (INSN), instead of adding after it
2289 reaching_reg <- old_reg
2290 it's better to do the following:
2291 reaching_reg <- expr
2292 old_reg <- reaching_reg
2293 because this way copy propagation can discover additional PRE
f5f2e3cd
MH
2294 opportunities. But if this fails, we try the old way.
2295 When "expr" is a store, i.e.
2296 given "MEM <- old_reg", instead of adding after it
2297 reaching_reg <- old_reg
2298 it's better to add it before as follows:
2299 reaching_reg <- old_reg
2300 MEM <- reaching_reg. */
7506f491
DE
2301
2302static void
1d088dee 2303pre_insert_copy_insn (struct expr *expr, rtx insn)
7506f491
DE
2304{
2305 rtx reg = expr->reaching_reg;
2306 int regno = REGNO (reg);
2307 int indx = expr->bitmap_index;
073089a7 2308 rtx pat = PATTERN (insn);
64068ca2 2309 rtx set, first_set, new_insn;
b885908b 2310 rtx old_reg;
073089a7 2311 int i;
7506f491 2312
073089a7 2313 /* This block matches the logic in hash_scan_insn. */
282899df 2314 switch (GET_CODE (pat))
073089a7 2315 {
282899df
NS
2316 case SET:
2317 set = pat;
2318 break;
2319
2320 case PARALLEL:
073089a7
RS
2321 /* Search through the parallel looking for the set whose
2322 source was the expression that we're interested in. */
64068ca2 2323 first_set = NULL_RTX;
073089a7
RS
2324 set = NULL_RTX;
2325 for (i = 0; i < XVECLEN (pat, 0); i++)
2326 {
2327 rtx x = XVECEXP (pat, 0, i);
64068ca2 2328 if (GET_CODE (x) == SET)
073089a7 2329 {
64068ca2
RS
2330 /* If the source was a REG_EQUAL or REG_EQUIV note, we
2331 may not find an equivalent expression, but in this
2332 case the PARALLEL will have a single set. */
2333 if (first_set == NULL_RTX)
2334 first_set = x;
2335 if (expr_equiv_p (SET_SRC (x), expr->expr))
2336 {
2337 set = x;
2338 break;
2339 }
073089a7
RS
2340 }
2341 }
64068ca2
RS
2342
2343 gcc_assert (first_set);
2344 if (set == NULL_RTX)
2345 set = first_set;
282899df
NS
2346 break;
2347
2348 default:
2349 gcc_unreachable ();
073089a7 2350 }
c4c81601 2351
7b1b4aed 2352 if (REG_P (SET_DEST (set)))
073089a7 2353 {
f5f2e3cd
MH
2354 old_reg = SET_DEST (set);
2355 /* Check if we can modify the set destination in the original insn. */
2356 if (validate_change (insn, &SET_DEST (set), reg, 0))
2357 {
2358 new_insn = gen_move_insn (old_reg, reg);
2359 new_insn = emit_insn_after (new_insn, insn);
f5f2e3cd
MH
2360 }
2361 else
2362 {
2363 new_insn = gen_move_insn (reg, old_reg);
2364 new_insn = emit_insn_after (new_insn, insn);
f5f2e3cd 2365 }
073089a7 2366 }
f5f2e3cd 2367 else /* This is possible only in case of a store to memory. */
073089a7 2368 {
f5f2e3cd 2369 old_reg = SET_SRC (set);
073089a7 2370 new_insn = gen_move_insn (reg, old_reg);
f5f2e3cd
MH
2371
2372 /* Check if we can modify the set source in the original insn. */
2373 if (validate_change (insn, &SET_SRC (set), reg, 0))
2374 new_insn = emit_insn_before (new_insn, insn);
2375 else
2376 new_insn = emit_insn_after (new_insn, insn);
073089a7 2377 }
7506f491
DE
2378
2379 gcse_create_count++;
2380
10d22567
ZD
2381 if (dump_file)
2382 fprintf (dump_file,
a42cd965 2383 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
b0de17ef 2384 BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
a42cd965 2385 INSN_UID (insn), regno);
7506f491
DE
2386}
2387
2388/* Copy available expressions that reach the redundant expression
2389 to `reaching_reg'. */
2390
2391static void
1d088dee 2392pre_insert_copies (void)
7506f491 2393{
f5f2e3cd 2394 unsigned int i, added_copy;
c4c81601
RK
2395 struct expr *expr;
2396 struct occr *occr;
2397 struct occr *avail;
a65f3558 2398
7506f491
DE
2399 /* For each available expression in the table, copy the result to
2400 `reaching_reg' if the expression reaches a deleted one.
2401
2402 ??? The current algorithm is rather brute force.
2403 Need to do some profiling. */
2404
02280659 2405 for (i = 0; i < expr_hash_table.size; i++)
43c8a043 2406 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
c4c81601
RK
2407 {
2408 /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2409 we don't want to insert a copy here because the expression may not
2410 really be redundant. So only insert an insn if the expression was
2411 deleted. This test also avoids further processing if the
2412 expression wasn't deleted anywhere. */
2413 if (expr->reaching_reg == NULL)
2414 continue;
7b1b4aed 2415
f5f2e3cd 2416 /* Set when we add a copy for that expression. */
7b1b4aed 2417 added_copy = 0;
c4c81601
RK
2418
2419 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2420 {
2421 if (! occr->deleted_p)
2422 continue;
7506f491 2423
c4c81601
RK
2424 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2425 {
2426 rtx insn = avail->insn;
7506f491 2427
c4c81601
RK
2428 /* No need to handle this one if handled already. */
2429 if (avail->copied_p)
2430 continue;
7506f491 2431
c4c81601 2432 /* Don't handle this one if it's a redundant one. */
4a81774c 2433 if (INSN_DELETED_P (insn))
c4c81601 2434 continue;
7506f491 2435
c4c81601 2436 /* Or if the expression doesn't reach the deleted one. */
589005ff 2437 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
e2d2ed72
AM
2438 expr,
2439 BLOCK_FOR_INSN (occr->insn)))
c4c81601 2440 continue;
7506f491 2441
f5f2e3cd
MH
2442 added_copy = 1;
2443
c4c81601
RK
2444 /* Copy the result of avail to reaching_reg. */
2445 pre_insert_copy_insn (expr, insn);
2446 avail->copied_p = 1;
2447 }
2448 }
f5f2e3cd 2449
7b1b4aed 2450 if (added_copy)
f5f2e3cd 2451 update_ld_motion_stores (expr);
c4c81601 2452 }
7506f491
DE
2453}
2454
10d1bb36
JH
2455/* Emit move from SRC to DEST noting the equivalence with expression computed
2456 in INSN. */
43c8a043 2457
10d1bb36 2458static rtx
43c8a043 2459gcse_emit_move_after (rtx dest, rtx src, rtx insn)
10d1bb36 2460{
60564289 2461 rtx new_rtx;
6bdb8dd6 2462 rtx set = single_set (insn), set2;
10d1bb36
JH
2463 rtx note;
2464 rtx eqv;
2465
2466 /* This should never fail since we're creating a reg->reg copy
2467 we've verified to be valid. */
2468
60564289 2469 new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
285464d0 2470
10d1bb36 2471 /* Note the equivalence for local CSE pass. */
60564289 2472 set2 = single_set (new_rtx);
6bdb8dd6 2473 if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
60564289 2474 return new_rtx;
10d1bb36
JH
2475 if ((note = find_reg_equal_equiv_note (insn)))
2476 eqv = XEXP (note, 0);
2477 else
2478 eqv = SET_SRC (set);
2479
60564289 2480 set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
10d1bb36 2481
60564289 2482 return new_rtx;
10d1bb36
JH
2483}
2484
7506f491 2485/* Delete redundant computations.
7506f491
DE
2486 Deletion is done by changing the insn to copy the `reaching_reg' of
2487 the expression into the result of the SET. It is left to later passes
2488 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2489
43c8a043 2490 Return nonzero if a change is made. */
7506f491
DE
2491
2492static int
1d088dee 2493pre_delete (void)
7506f491 2494{
2e653e39 2495 unsigned int i;
63bc1d05 2496 int changed;
c4c81601
RK
2497 struct expr *expr;
2498 struct occr *occr;
a65f3558 2499
7506f491 2500 changed = 0;
02280659 2501 for (i = 0; i < expr_hash_table.size; i++)
43c8a043 2502 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
c4c81601
RK
2503 {
2504 int indx = expr->bitmap_index;
7506f491 2505
43c8a043 2506 /* We only need to search antic_occr since we require ANTLOC != 0. */
c4c81601
RK
2507 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2508 {
2509 rtx insn = occr->insn;
2510 rtx set;
e2d2ed72 2511 basic_block bb = BLOCK_FOR_INSN (insn);
7506f491 2512
073089a7
RS
2513 /* We only delete insns that have a single_set. */
2514 if (TEST_BIT (pre_delete_map[bb->index], indx)
6fb5fa3c
DB
2515 && (set = single_set (insn)) != 0
2516 && dbg_cnt (pre_insn))
c4c81601 2517 {
c4c81601
RK
2518 /* Create a pseudo-reg to store the result of reaching
2519 expressions into. Get the mode for the new pseudo from
2520 the mode of the original destination pseudo. */
2521 if (expr->reaching_reg == NULL)
46b71b03 2522 expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
c4c81601 2523
43c8a043 2524 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
10d1bb36
JH
2525 delete_insn (insn);
2526 occr->deleted_p = 1;
10d1bb36
JH
2527 changed = 1;
2528 gcse_subst_count++;
7506f491 2529
10d22567 2530 if (dump_file)
c4c81601 2531 {
10d22567 2532 fprintf (dump_file,
c4c81601
RK
2533 "PRE: redundant insn %d (expression %d) in ",
2534 INSN_UID (insn), indx);
10d22567 2535 fprintf (dump_file, "bb %d, reaching reg is %d\n",
0b17ab2f 2536 bb->index, REGNO (expr->reaching_reg));
c4c81601
RK
2537 }
2538 }
2539 }
2540 }
7506f491
DE
2541
2542 return changed;
2543}
2544
2545/* Perform GCSE optimizations using PRE.
2546 This is called by one_pre_gcse_pass after all the dataflow analysis
2547 has been done.
2548
c4c81601
RK
2549 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2550 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2551 Compiler Design and Implementation.
7506f491 2552
c4c81601
RK
2553 ??? A new pseudo reg is created to hold the reaching expression. The nice
2554 thing about the classical approach is that it would try to use an existing
2555 reg. If the register can't be adequately optimized [i.e. we introduce
2556 reload problems], one could add a pass here to propagate the new register
2557 through the block.
7506f491 2558
c4c81601
RK
2559 ??? We don't handle single sets in PARALLELs because we're [currently] not
2560 able to copy the rest of the parallel when we insert copies to create full
2561 redundancies from partial redundancies. However, there's no reason why we
2562 can't handle PARALLELs in the cases where there are no partial
7506f491
DE
2563 redundancies. */
2564
2565static int
43c8a043 2566pre_gcse (struct edge_list *edge_list)
7506f491 2567{
2e653e39
RK
2568 unsigned int i;
2569 int did_insert, changed;
7506f491 2570 struct expr **index_map;
c4c81601 2571 struct expr *expr;
7506f491
DE
2572
2573 /* Compute a mapping from expression number (`bitmap_index') to
2574 hash table entry. */
2575
5ed6ace5 2576 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
02280659 2577 for (i = 0; i < expr_hash_table.size; i++)
43c8a043 2578 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
c4c81601 2579 index_map[expr->bitmap_index] = expr;
7506f491 2580
7506f491
DE
2581 /* Delete the redundant insns first so that
2582 - we know what register to use for the new insns and for the other
2583 ones with reaching expressions
2584 - we know which insns are redundant when we go to create copies */
c4c81601 2585
7506f491 2586 changed = pre_delete ();
a42cd965 2587 did_insert = pre_edge_insert (edge_list, index_map);
c4c81601 2588
7506f491 2589 /* In other places with reaching expressions, copy the expression to the
a42cd965 2590 specially allocated pseudo-reg that reaches the redundant expr. */
7506f491 2591 pre_insert_copies ();
a42cd965
AM
2592 if (did_insert)
2593 {
2594 commit_edge_insertions ();
2595 changed = 1;
2596 }
7506f491 2597
283a2545 2598 free (index_map);
7506f491
DE
2599 return changed;
2600}
2601
2602/* Top level routine to perform one PRE GCSE pass.
2603
cc2902df 2604 Return nonzero if a change was made. */
7506f491
DE
2605
2606static int
5f39ad47 2607one_pre_gcse_pass (void)
7506f491
DE
2608{
2609 int changed = 0;
2610
2611 gcse_subst_count = 0;
2612 gcse_create_count = 0;
2613
5f39ad47
SB
2614 /* Return if there's nothing to do, or it is too expensive. */
2615 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2616 || is_too_expensive (_("PRE disabled")))
2617 return 0;
2618
2619 /* We need alias. */
2620 init_alias_analysis ();
2621
2622 bytes_used = 0;
2623 gcc_obstack_init (&gcse_obstack);
2624 alloc_gcse_mem ();
2625
e45425ec 2626 alloc_hash_table (&expr_hash_table);
a42cd965 2627 add_noreturn_fake_exit_edges ();
a13d4ebf
AM
2628 if (flag_gcse_lm)
2629 compute_ld_motion_mems ();
2630
02280659 2631 compute_hash_table (&expr_hash_table);
43c8a043
EB
2632 if (flag_gcse_lm)
2633 trim_ld_motion_mems ();
10d22567
ZD
2634 if (dump_file)
2635 dump_hash_table (dump_file, "Expression", &expr_hash_table);
c4c81601 2636
02280659 2637 if (expr_hash_table.n_elems > 0)
7506f491 2638 {
43c8a043 2639 struct edge_list *edge_list;
02280659 2640 alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
43c8a043
EB
2641 edge_list = compute_pre_data ();
2642 changed |= pre_gcse (edge_list);
a42cd965 2643 free_edge_list (edge_list);
7506f491
DE
2644 free_pre_mem ();
2645 }
c4c81601 2646
43c8a043
EB
2647 if (flag_gcse_lm)
2648 free_ld_motion_mems ();
6809cbf9 2649 remove_fake_exit_edges ();
02280659 2650 free_hash_table (&expr_hash_table);
7506f491 2651
5f39ad47
SB
2652 free_gcse_mem ();
2653 obstack_free (&gcse_obstack, NULL);
2654
2655 /* We are finished with alias. */
2656 end_alias_analysis ();
2657
10d22567 2658 if (dump_file)
7506f491 2659 {
5f39ad47
SB
2660 fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2661 current_function_name (), n_basic_blocks, bytes_used);
10d22567 2662 fprintf (dump_file, "%d substs, %d insns created\n",
c4c81601 2663 gcse_subst_count, gcse_create_count);
7506f491
DE
2664 }
2665
2666 return changed;
2667}
aeb2f500 2668\f
cf7c4aa6
HPN
2669/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2670 to INSN. If such notes are added to an insn which references a
2671 CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2672 that note, because the following loop optimization pass requires
2673 them. */
aeb2f500 2674
aeb2f500
JW
2675/* ??? If there was a jump optimization pass after gcse and before loop,
2676 then we would not need to do this here, because jump would add the
cf7c4aa6 2677 necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
aeb2f500
JW
2678
2679static void
1d088dee 2680add_label_notes (rtx x, rtx insn)
aeb2f500
JW
2681{
2682 enum rtx_code code = GET_CODE (x);
2683 int i, j;
6f7d635c 2684 const char *fmt;
aeb2f500
JW
2685
2686 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2687 {
6b3603c2 2688 /* This code used to ignore labels that referred to dispatch tables to
e0bb17a8 2689 avoid flow generating (slightly) worse code.
6b3603c2 2690
ac7c5af5
JL
2691 We no longer ignore such label references (see LABEL_REF handling in
2692 mark_jump_label for additional information). */
c4c81601 2693
cb2f563b
HPN
2694 /* There's no reason for current users to emit jump-insns with
2695 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2696 notes. */
2697 gcc_assert (!JUMP_P (insn));
65c5f2a6
ILT
2698 add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2699
cb2f563b
HPN
2700 if (LABEL_P (XEXP (x, 0)))
2701 LABEL_NUSES (XEXP (x, 0))++;
2702
aeb2f500
JW
2703 return;
2704 }
2705
c4c81601 2706 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
aeb2f500
JW
2707 {
2708 if (fmt[i] == 'e')
2709 add_label_notes (XEXP (x, i), insn);
2710 else if (fmt[i] == 'E')
2711 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2712 add_label_notes (XVECEXP (x, i, j), insn);
2713 }
2714}
a65f3558 2715
bb457bd9
JL
2716/* Code Hoisting variables and subroutines. */
2717
2718/* Very busy expressions. */
2719static sbitmap *hoist_vbein;
2720static sbitmap *hoist_vbeout;
2721
bb457bd9 2722/* ??? We could compute post dominators and run this algorithm in
68e82b83 2723 reverse to perform tail merging, doing so would probably be
bb457bd9
JL
2724 more effective than the tail merging code in jump.c.
2725
2726 It's unclear if tail merging could be run in parallel with
2727 code hoisting. It would be nice. */
2728
2729/* Allocate vars used for code hoisting analysis. */
2730
2731static void
1d088dee 2732alloc_code_hoist_mem (int n_blocks, int n_exprs)
bb457bd9
JL
2733{
2734 antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2735 transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2736 comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2737
2738 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2739 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
bb457bd9
JL
2740}
2741
2742/* Free vars used for code hoisting analysis. */
2743
2744static void
1d088dee 2745free_code_hoist_mem (void)
bb457bd9 2746{
5a660bff
DB
2747 sbitmap_vector_free (antloc);
2748 sbitmap_vector_free (transp);
2749 sbitmap_vector_free (comp);
bb457bd9 2750
5a660bff
DB
2751 sbitmap_vector_free (hoist_vbein);
2752 sbitmap_vector_free (hoist_vbeout);
bb457bd9 2753
d47cc544 2754 free_dominance_info (CDI_DOMINATORS);
bb457bd9
JL
2755}
2756
2757/* Compute the very busy expressions at entry/exit from each block.
2758
2759 An expression is very busy if all paths from a given point
2760 compute the expression. */
2761
2762static void
1d088dee 2763compute_code_hoist_vbeinout (void)
bb457bd9 2764{
e0082a72
ZD
2765 int changed, passes;
2766 basic_block bb;
bb457bd9 2767
d55bc081
ZD
2768 sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2769 sbitmap_vector_zero (hoist_vbein, last_basic_block);
bb457bd9
JL
2770
2771 passes = 0;
2772 changed = 1;
c4c81601 2773
bb457bd9
JL
2774 while (changed)
2775 {
2776 changed = 0;
c4c81601 2777
bb457bd9
JL
2778 /* We scan the blocks in the reverse order to speed up
2779 the convergence. */
e0082a72 2780 FOR_EACH_BB_REVERSE (bb)
bb457bd9 2781 {
e0082a72 2782 if (bb->next_bb != EXIT_BLOCK_PTR)
ce4c0015
MK
2783 {
2784 sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
3c2c4f22 2785 hoist_vbein, bb);
ce4c0015
MK
2786
2787 /* Include expressions in VBEout that are calculated
2788 in BB and available at its end. */
2789 sbitmap_a_or_b (hoist_vbeout[bb->index],
2790 hoist_vbeout[bb->index], comp[bb->index]);
2791 }
f8423fea
SB
2792
2793 changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2794 antloc[bb->index],
2795 hoist_vbeout[bb->index],
2796 transp[bb->index]);
bb457bd9 2797 }
c4c81601 2798
bb457bd9
JL
2799 passes++;
2800 }
2801
10d22567 2802 if (dump_file)
cad9aa15
MK
2803 {
2804 fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2805
2806 FOR_EACH_BB (bb)
2807 {
2808 fprintf (dump_file, "vbein (%d): ", bb->index);
2809 dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2810 fprintf (dump_file, "vbeout(%d): ", bb->index);
2811 dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2812 }
2813 }
bb457bd9
JL
2814}
2815
2816/* Top level routine to do the dataflow analysis needed by code hoisting. */
2817
2818static void
1d088dee 2819compute_code_hoist_data (void)
bb457bd9 2820{
02280659 2821 compute_local_properties (transp, comp, antloc, &expr_hash_table);
9b774782 2822 prune_expressions (false);
bb457bd9 2823 compute_code_hoist_vbeinout ();
d47cc544 2824 calculate_dominance_info (CDI_DOMINATORS);
10d22567
ZD
2825 if (dump_file)
2826 fprintf (dump_file, "\n");
bb457bd9
JL
2827}
2828
2829/* Determine if the expression identified by EXPR_INDEX would
2830 reach BB unimpared if it was placed at the end of EXPR_BB.
20160347
MK
2831 Stop the search if the expression would need to be moved more
2832 than DISTANCE instructions.
bb457bd9
JL
2833
2834 It's unclear exactly what Muchnick meant by "unimpared". It seems
2835 to me that the expression must either be computed or transparent in
2836 *every* block in the path(s) from EXPR_BB to BB. Any other definition
2837 would allow the expression to be hoisted out of loops, even if
2838 the expression wasn't a loop invariant.
2839
2840 Contrast this to reachability for PRE where an expression is
2841 considered reachable if *any* path reaches instead of *all*
2842 paths. */
2843
2844static int
20160347
MK
2845hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2846 char *visited, int distance, int *bb_size)
bb457bd9
JL
2847{
2848 edge pred;
628f6a4e 2849 edge_iterator ei;
283a2545 2850 int visited_allocated_locally = 0;
589005ff 2851
20160347
MK
2852 /* Terminate the search if distance, for which EXPR is allowed to move,
2853 is exhausted. */
2854 if (distance > 0)
2855 {
2856 distance -= bb_size[bb->index];
2857
2858 if (distance <= 0)
2859 return 0;
2860 }
2861 else
2862 gcc_assert (distance == 0);
bb457bd9
JL
2863
2864 if (visited == NULL)
2865 {
8e42ace1 2866 visited_allocated_locally = 1;
5ed6ace5 2867 visited = XCNEWVEC (char, last_basic_block);
bb457bd9
JL
2868 }
2869
628f6a4e 2870 FOR_EACH_EDGE (pred, ei, bb->preds)
bb457bd9 2871 {
e2d2ed72 2872 basic_block pred_bb = pred->src;
bb457bd9
JL
2873
2874 if (pred->src == ENTRY_BLOCK_PTR)
2875 break;
f305679f
JH
2876 else if (pred_bb == expr_bb)
2877 continue;
0b17ab2f 2878 else if (visited[pred_bb->index])
bb457bd9 2879 continue;
c4c81601 2880
0b17ab2f 2881 else if (! TEST_BIT (transp[pred_bb->index], expr_index))
bb457bd9 2882 break;
c4c81601 2883
bb457bd9
JL
2884 /* Not killed. */
2885 else
2886 {
0b17ab2f 2887 visited[pred_bb->index] = 1;
20160347
MK
2888 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2889 visited, distance, bb_size))
bb457bd9
JL
2890 break;
2891 }
2892 }
589005ff 2893 if (visited_allocated_locally)
283a2545 2894 free (visited);
c4c81601 2895
bb457bd9
JL
2896 return (pred == NULL);
2897}
2898\f
073a8998 2899/* Find occurrence in BB. */
43c8a043 2900
20160347
MK
2901static struct occr *
2902find_occr_in_bb (struct occr *occr, basic_block bb)
2903{
2904 /* Find the right occurrence of this expression. */
2905 while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2906 occr = occr->next;
2907
2908 return occr;
2909}
2910
bb457bd9 2911/* Actually perform code hoisting. */
c4c81601 2912
5f39ad47 2913static int
1d088dee 2914hoist_code (void)
bb457bd9 2915{
e0082a72 2916 basic_block bb, dominated;
cad9aa15
MK
2917 VEC (basic_block, heap) *dom_tree_walk;
2918 unsigned int dom_tree_walk_index;
66f97d31 2919 VEC (basic_block, heap) *domby;
c635a1ec 2920 unsigned int i,j;
bb457bd9 2921 struct expr **index_map;
c4c81601 2922 struct expr *expr;
20160347
MK
2923 int *to_bb_head;
2924 int *bb_size;
5f39ad47 2925 int changed = 0;
bb457bd9 2926
bb457bd9
JL
2927 /* Compute a mapping from expression number (`bitmap_index') to
2928 hash table entry. */
2929
5ed6ace5 2930 index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
02280659 2931 for (i = 0; i < expr_hash_table.size; i++)
43c8a043 2932 for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
c4c81601 2933 index_map[expr->bitmap_index] = expr;
bb457bd9 2934
20160347
MK
2935 /* Calculate sizes of basic blocks and note how far
2936 each instruction is from the start of its block. We then use this
2937 data to restrict distance an expression can travel. */
2938
2939 to_bb_head = XCNEWVEC (int, get_max_uid ());
2940 bb_size = XCNEWVEC (int, last_basic_block);
2941
2942 FOR_EACH_BB (bb)
2943 {
2944 rtx insn;
20160347
MK
2945 int to_head;
2946
20160347 2947 to_head = 0;
05b5ea34 2948 FOR_BB_INSNS (bb, insn)
20160347
MK
2949 {
2950 /* Don't count debug instructions to avoid them affecting
2951 decision choices. */
2952 if (NONDEBUG_INSN_P (insn))
2953 to_bb_head[INSN_UID (insn)] = to_head++;
20160347
MK
2954 }
2955
2956 bb_size[bb->index] = to_head;
2957 }
2958
cad9aa15
MK
2959 gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2960 && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2961 == ENTRY_BLOCK_PTR->next_bb));
2962
2963 dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2964 ENTRY_BLOCK_PTR->next_bb);
2965
bb457bd9
JL
2966 /* Walk over each basic block looking for potentially hoistable
2967 expressions, nothing gets hoisted from the entry block. */
ac47786e 2968 FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
bb457bd9 2969 {
cad9aa15
MK
2970 domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2971
2972 if (VEC_length (basic_block, domby) == 0)
2973 continue;
bb457bd9
JL
2974
2975 /* Examine each expression that is very busy at the exit of this
2976 block. These are the potentially hoistable expressions. */
5829cc0f 2977 for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
bb457bd9 2978 {
9b774782 2979 if (TEST_BIT (hoist_vbeout[bb->index], i))
bb457bd9 2980 {
cad9aa15
MK
2981 /* Current expression. */
2982 struct expr *expr = index_map[i];
073a8998 2983 /* Number of occurrences of EXPR that can be hoisted to BB. */
cad9aa15 2984 int hoistable = 0;
073a8998 2985 /* Basic blocks that have occurrences reachable from BB. */
cad9aa15 2986 bitmap_head _from_bbs, *from_bbs = &_from_bbs;
073a8998 2987 /* Occurrences reachable from BB. */
cad9aa15
MK
2988 VEC (occr_t, heap) *occrs_to_hoist = NULL;
2989 /* We want to insert the expression into BB only once, so
2990 note when we've inserted it. */
2991 int insn_inserted_p;
2992 occr_t occr;
2993
2994 bitmap_initialize (from_bbs, 0);
2995
ce4c0015 2996 /* If an expression is computed in BB and is available at end of
073a8998 2997 BB, hoist all occurrences dominated by BB to BB. */
ce4c0015 2998 if (TEST_BIT (comp[bb->index], i))
cad9aa15
MK
2999 {
3000 occr = find_occr_in_bb (expr->antic_occr, bb);
3001
3002 if (occr)
3003 {
073a8998 3004 /* An occurrence might've been already deleted
cad9aa15 3005 while processing a dominator of BB. */
2d36b47f 3006 if (!occr->deleted_p)
cad9aa15
MK
3007 {
3008 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3009 hoistable++;
3010 }
3011 }
3012 else
3013 hoistable++;
3014 }
ce4c0015 3015
bb457bd9
JL
3016 /* We've found a potentially hoistable expression, now
3017 we look at every block BB dominates to see if it
3018 computes the expression. */
ac47786e 3019 FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
bb457bd9 3020 {
20160347
MK
3021 int max_distance;
3022
bb457bd9 3023 /* Ignore self dominance. */
c635a1ec 3024 if (bb == dominated)
bb457bd9 3025 continue;
bb457bd9
JL
3026 /* We've found a dominated block, now see if it computes
3027 the busy expression and whether or not moving that
3028 expression to the "beginning" of that block is safe. */
e0082a72 3029 if (!TEST_BIT (antloc[dominated->index], i))
bb457bd9
JL
3030 continue;
3031
cad9aa15
MK
3032 occr = find_occr_in_bb (expr->antic_occr, dominated);
3033 gcc_assert (occr);
20160347 3034
073a8998 3035 /* An occurrence might've been already deleted
cad9aa15
MK
3036 while processing a dominator of BB. */
3037 if (occr->deleted_p)
2d36b47f 3038 continue;
cad9aa15
MK
3039 gcc_assert (NONDEBUG_INSN_P (occr->insn));
3040
3041 max_distance = expr->max_distance;
3042 if (max_distance > 0)
3043 /* Adjust MAX_DISTANCE to account for the fact that
3044 OCCR won't have to travel all of DOMINATED, but
3045 only part of it. */
3046 max_distance += (bb_size[dominated->index]
3047 - to_bb_head[INSN_UID (occr->insn)]);
20160347 3048
bb457bd9 3049 /* Note if the expression would reach the dominated block
589005ff 3050 unimpared if it was placed at the end of BB.
bb457bd9
JL
3051
3052 Keep track of how many times this expression is hoistable
3053 from a dominated block into BB. */
20160347
MK
3054 if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3055 max_distance, bb_size))
cad9aa15
MK
3056 {
3057 hoistable++;
3058 VEC_safe_push (occr_t, heap,
3059 occrs_to_hoist, occr);
3060 bitmap_set_bit (from_bbs, dominated->index);
3061 }
bb457bd9
JL
3062 }
3063
ff7cc307 3064 /* If we found more than one hoistable occurrence of this
cad9aa15 3065 expression, then note it in the vector of expressions to
bb457bd9
JL
3066 hoist. It makes no sense to hoist things which are computed
3067 in only one BB, and doing so tends to pessimize register
3068 allocation. One could increase this value to try harder
3069 to avoid any possible code expansion due to register
3070 allocation issues; however experiments have shown that
3071 the vast majority of hoistable expressions are only movable
e0bb17a8 3072 from two successors, so raising this threshold is likely
bb457bd9 3073 to nullify any benefit we get from code hoisting. */
62a3f636 3074 if (hoistable > 1 && dbg_cnt (hoist_insn))
bb457bd9 3075 {
cad9aa15 3076 /* If (hoistable != VEC_length), then there is
073a8998 3077 an occurrence of EXPR in BB itself. Don't waste
cad9aa15
MK
3078 time looking for LCA in this case. */
3079 if ((unsigned) hoistable
3080 == VEC_length (occr_t, occrs_to_hoist))
3081 {
3082 basic_block lca;
3083
3084 lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3085 from_bbs);
3086 if (lca != bb)
073a8998 3087 /* Punt, it's better to hoist these occurrences to
cad9aa15
MK
3088 LCA. */
3089 VEC_free (occr_t, heap, occrs_to_hoist);
3090 }
bb457bd9 3091 }
cad9aa15
MK
3092 else
3093 /* Punt, no point hoisting a single occurence. */
3094 VEC_free (occr_t, heap, occrs_to_hoist);
bb457bd9 3095
cad9aa15 3096 insn_inserted_p = 0;
bb457bd9 3097
073a8998 3098 /* Walk through occurrences of I'th expressions we want
cad9aa15 3099 to hoist to BB and make the transformations. */
ac47786e 3100 FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
bb457bd9 3101 {
cad9aa15
MK
3102 rtx insn;
3103 rtx set;
3104
3105 gcc_assert (!occr->deleted_p);
3106
3107 insn = occr->insn;
3108 set = single_set (insn);
3109 gcc_assert (set);
3110
3111 /* Create a pseudo-reg to store the result of reaching
3112 expressions into. Get the mode for the new pseudo
3113 from the mode of the original destination pseudo.
3114
3115 It is important to use new pseudos whenever we
3116 emit a set. This will allow reload to use
3117 rematerialization for such registers. */
3118 if (!insn_inserted_p)
3119 expr->reaching_reg
3120 = gen_reg_rtx_and_attrs (SET_DEST (set));
3121
43c8a043 3122 gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
cad9aa15
MK
3123 insn);
3124 delete_insn (insn);
3125 occr->deleted_p = 1;
3126 changed = 1;
3127 gcse_subst_count++;
3128
3129 if (!insn_inserted_p)
bb457bd9 3130 {
cad9aa15
MK
3131 insert_insn_end_basic_block (expr, bb);
3132 insn_inserted_p = 1;
bb457bd9
JL
3133 }
3134 }
cad9aa15
MK
3135
3136 VEC_free (occr_t, heap, occrs_to_hoist);
3137 bitmap_clear (from_bbs);
bb457bd9
JL
3138 }
3139 }
66f97d31 3140 VEC_free (basic_block, heap, domby);
bb457bd9 3141 }
c4c81601 3142
cad9aa15 3143 VEC_free (basic_block, heap, dom_tree_walk);
20160347
MK
3144 free (bb_size);
3145 free (to_bb_head);
8e42ace1 3146 free (index_map);
5f39ad47
SB
3147
3148 return changed;
bb457bd9
JL
3149}
3150
3151/* Top level routine to perform one code hoisting (aka unification) pass
3152
cc2902df 3153 Return nonzero if a change was made. */
bb457bd9
JL
3154
3155static int
1d088dee 3156one_code_hoisting_pass (void)
bb457bd9
JL
3157{
3158 int changed = 0;
3159
5f39ad47
SB
3160 gcse_subst_count = 0;
3161 gcse_create_count = 0;
3162
3163 /* Return if there's nothing to do, or it is too expensive. */
3164 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3165 || is_too_expensive (_("GCSE disabled")))
3166 return 0;
3167
20160347
MK
3168 doing_code_hoisting_p = true;
3169
5f39ad47
SB
3170 /* We need alias. */
3171 init_alias_analysis ();
3172
3173 bytes_used = 0;
3174 gcc_obstack_init (&gcse_obstack);
3175 alloc_gcse_mem ();
3176
e45425ec 3177 alloc_hash_table (&expr_hash_table);
02280659 3178 compute_hash_table (&expr_hash_table);
10d22567
ZD
3179 if (dump_file)
3180 dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
c4c81601 3181
02280659 3182 if (expr_hash_table.n_elems > 0)
bb457bd9 3183 {
02280659 3184 alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
bb457bd9 3185 compute_code_hoist_data ();
5f39ad47 3186 changed = hoist_code ();
bb457bd9
JL
3187 free_code_hoist_mem ();
3188 }
c4c81601 3189
02280659 3190 free_hash_table (&expr_hash_table);
5f39ad47
SB
3191 free_gcse_mem ();
3192 obstack_free (&gcse_obstack, NULL);
3193
3194 /* We are finished with alias. */
3195 end_alias_analysis ();
3196
3197 if (dump_file)
3198 {
3199 fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3200 current_function_name (), n_basic_blocks, bytes_used);
3201 fprintf (dump_file, "%d substs, %d insns created\n",
3202 gcse_subst_count, gcse_create_count);
3203 }
bb457bd9 3204
20160347
MK
3205 doing_code_hoisting_p = false;
3206
bb457bd9
JL
3207 return changed;
3208}
a13d4ebf 3209\f
43c8a043
EB
3210/* Here we provide the things required to do store motion towards the exit.
3211 In order for this to be effective, gcse also needed to be taught how to
3212 move a load when it is killed only by a store to itself.
a13d4ebf
AM
3213
3214 int i;
3215 float a[10];
3216
3217 void foo(float scale)
3218 {
3219 for (i=0; i<10; i++)
3220 a[i] *= scale;
3221 }
3222
3223 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
589005ff
KH
3224 the load out since its live around the loop, and stored at the bottom
3225 of the loop.
a13d4ebf 3226
589005ff 3227 The 'Load Motion' referred to and implemented in this file is
43c8a043 3228 an enhancement to gcse which when using edge based LCM, recognizes
a13d4ebf
AM
3229 this situation and allows gcse to move the load out of the loop.
3230
3231 Once gcse has hoisted the load, store motion can then push this
3232 load towards the exit, and we end up with no loads or stores of 'i'
3233 in the loop. */
3234
9727e468
RG
3235static hashval_t
3236pre_ldst_expr_hash (const void *p)
3237{
3238 int do_not_record_p = 0;
1b4572a8 3239 const struct ls_expr *const x = (const struct ls_expr *) p;
43c8a043
EB
3240 return
3241 hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
9727e468
RG
3242}
3243
3244static int
3245pre_ldst_expr_eq (const void *p1, const void *p2)
3246{
1b4572a8
KG
3247 const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3248 *const ptr2 = (const struct ls_expr *) p2;
9727e468
RG
3249 return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3250}
3251
ff7cc307 3252/* This will search the ldst list for a matching expression. If it
a13d4ebf
AM
3253 doesn't find one, we create one and initialize it. */
3254
3255static struct ls_expr *
1d088dee 3256ldst_entry (rtx x)
a13d4ebf 3257{
b58b21d5 3258 int do_not_record_p = 0;
a13d4ebf 3259 struct ls_expr * ptr;
b58b21d5 3260 unsigned int hash;
9727e468
RG
3261 void **slot;
3262 struct ls_expr e;
a13d4ebf 3263
0516f6fe
SB
3264 hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3265 NULL, /*have_reg_qty=*/false);
a13d4ebf 3266
9727e468
RG
3267 e.pattern = x;
3268 slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3269 if (*slot)
3270 return (struct ls_expr *)*slot;
b58b21d5 3271
5ed6ace5 3272 ptr = XNEW (struct ls_expr);
b58b21d5
RS
3273
3274 ptr->next = pre_ldst_mems;
3275 ptr->expr = NULL;
3276 ptr->pattern = x;
3277 ptr->pattern_regs = NULL_RTX;
3278 ptr->loads = NULL_RTX;
3279 ptr->stores = NULL_RTX;
3280 ptr->reaching_reg = NULL_RTX;
3281 ptr->invalid = 0;
3282 ptr->index = 0;
3283 ptr->hash_index = hash;
3284 pre_ldst_mems = ptr;
9727e468 3285 *slot = ptr;
589005ff 3286
a13d4ebf
AM
3287 return ptr;
3288}
3289
3290/* Free up an individual ldst entry. */
3291
589005ff 3292static void
1d088dee 3293free_ldst_entry (struct ls_expr * ptr)
a13d4ebf 3294{
aaa4ca30
AJ
3295 free_INSN_LIST_list (& ptr->loads);
3296 free_INSN_LIST_list (& ptr->stores);
a13d4ebf
AM
3297
3298 free (ptr);
3299}
3300
3301/* Free up all memory associated with the ldst list. */
3302
3303static void
43c8a043 3304free_ld_motion_mems (void)
a13d4ebf 3305{
35b5442a
RG
3306 if (pre_ldst_table)
3307 htab_delete (pre_ldst_table);
9727e468
RG
3308 pre_ldst_table = NULL;
3309
589005ff 3310 while (pre_ldst_mems)
a13d4ebf
AM
3311 {
3312 struct ls_expr * tmp = pre_ldst_mems;
3313
3314 pre_ldst_mems = pre_ldst_mems->next;
3315
3316 free_ldst_entry (tmp);
3317 }
3318
3319 pre_ldst_mems = NULL;
3320}
3321
3322/* Dump debugging info about the ldst list. */
3323
3324static void
1d088dee 3325print_ldst_list (FILE * file)
a13d4ebf
AM
3326{
3327 struct ls_expr * ptr;
3328
3329 fprintf (file, "LDST list: \n");
3330
43c8a043 3331 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
a13d4ebf
AM
3332 {
3333 fprintf (file, " Pattern (%3d): ", ptr->index);
3334
3335 print_rtl (file, ptr->pattern);
3336
3337 fprintf (file, "\n Loads : ");
3338
3339 if (ptr->loads)
3340 print_rtl (file, ptr->loads);
3341 else
3342 fprintf (file, "(nil)");
3343
3344 fprintf (file, "\n Stores : ");
3345
3346 if (ptr->stores)
3347 print_rtl (file, ptr->stores);
3348 else
3349 fprintf (file, "(nil)");
3350
3351 fprintf (file, "\n\n");
3352 }
3353
3354 fprintf (file, "\n");
3355}
3356
3357/* Returns 1 if X is in the list of ldst only expressions. */
3358
3359static struct ls_expr *
1d088dee 3360find_rtx_in_ldst (rtx x)
a13d4ebf 3361{
9727e468
RG
3362 struct ls_expr e;
3363 void **slot;
6375779a
RG
3364 if (!pre_ldst_table)
3365 return NULL;
9727e468
RG
3366 e.pattern = x;
3367 slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3368 if (!slot || ((struct ls_expr *)*slot)->invalid)
3369 return NULL;
1b4572a8 3370 return (struct ls_expr *) *slot;
a13d4ebf 3371}
a13d4ebf
AM
3372\f
3373/* Load Motion for loads which only kill themselves. */
3374
43c8a043
EB
3375/* Return true if x, a MEM, is a simple access with no side effects.
3376 These are the types of loads we consider for the ld_motion list,
3377 otherwise we let the usual aliasing take care of it. */
a13d4ebf 3378
589005ff 3379static int
ed7a4b4b 3380simple_mem (const_rtx x)
a13d4ebf 3381{
a13d4ebf
AM
3382 if (MEM_VOLATILE_P (x))
3383 return 0;
589005ff 3384
a13d4ebf
AM
3385 if (GET_MODE (x) == BLKmode)
3386 return 0;
aaa4ca30 3387
47a3dae1 3388 /* If we are handling exceptions, we must be careful with memory references
8f4f502f 3389 that may trap. If we are not, the behavior is undefined, so we may just
47a3dae1 3390 continue. */
8f4f502f 3391 if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
98d3d336
RS
3392 return 0;
3393
47a3dae1
ZD
3394 if (side_effects_p (x))
3395 return 0;
589005ff 3396
47a3dae1
ZD
3397 /* Do not consider function arguments passed on stack. */
3398 if (reg_mentioned_p (stack_pointer_rtx, x))
3399 return 0;
3400
3401 if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3402 return 0;
3403
3404 return 1;
a13d4ebf
AM
3405}
3406
589005ff
KH
3407/* Make sure there isn't a buried reference in this pattern anywhere.
3408 If there is, invalidate the entry for it since we're not capable
3409 of fixing it up just yet.. We have to be sure we know about ALL
a13d4ebf
AM
3410 loads since the aliasing code will allow all entries in the
3411 ld_motion list to not-alias itself. If we miss a load, we will get
589005ff 3412 the wrong value since gcse might common it and we won't know to
a13d4ebf
AM
3413 fix it up. */
3414
3415static void
1d088dee 3416invalidate_any_buried_refs (rtx x)
a13d4ebf
AM
3417{
3418 const char * fmt;
8e42ace1 3419 int i, j;
a13d4ebf
AM
3420 struct ls_expr * ptr;
3421
3422 /* Invalidate it in the list. */
7b1b4aed 3423 if (MEM_P (x) && simple_mem (x))
a13d4ebf
AM
3424 {
3425 ptr = ldst_entry (x);
3426 ptr->invalid = 1;
3427 }
3428
3429 /* Recursively process the insn. */
3430 fmt = GET_RTX_FORMAT (GET_CODE (x));
589005ff 3431
a13d4ebf
AM
3432 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3433 {
3434 if (fmt[i] == 'e')
3435 invalidate_any_buried_refs (XEXP (x, i));
3436 else if (fmt[i] == 'E')
3437 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3438 invalidate_any_buried_refs (XVECEXP (x, i, j));
3439 }
3440}
3441
4d3eb89a
HPN
3442/* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3443 being defined as MEM loads and stores to symbols, with no side effects
3444 and no registers in the expression. For a MEM destination, we also
3445 check that the insn is still valid if we replace the destination with a
3446 REG, as is done in update_ld_motion_stores. If there are any uses/defs
3447 which don't match this criteria, they are invalidated and trimmed out
3448 later. */
a13d4ebf 3449
589005ff 3450static void
1d088dee 3451compute_ld_motion_mems (void)
a13d4ebf
AM
3452{
3453 struct ls_expr * ptr;
e0082a72 3454 basic_block bb;
a13d4ebf 3455 rtx insn;
589005ff 3456
a13d4ebf 3457 pre_ldst_mems = NULL;
43c8a043
EB
3458 pre_ldst_table
3459 = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
a13d4ebf 3460
e0082a72 3461 FOR_EACH_BB (bb)
a13d4ebf 3462 {
eb232f4e 3463 FOR_BB_INSNS (bb, insn)
a13d4ebf 3464 {
b5b8b0ac 3465 if (NONDEBUG_INSN_P (insn))
a13d4ebf
AM
3466 {
3467 if (GET_CODE (PATTERN (insn)) == SET)
3468 {
3469 rtx src = SET_SRC (PATTERN (insn));
3470 rtx dest = SET_DEST (PATTERN (insn));
3471
3472 /* Check for a simple LOAD... */
7b1b4aed 3473 if (MEM_P (src) && simple_mem (src))
a13d4ebf
AM
3474 {
3475 ptr = ldst_entry (src);
7b1b4aed 3476 if (REG_P (dest))
a13d4ebf
AM
3477 ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3478 else
3479 ptr->invalid = 1;
3480 }
3481 else
3482 {
3483 /* Make sure there isn't a buried load somewhere. */
3484 invalidate_any_buried_refs (src);
3485 }
589005ff 3486
a13d4ebf
AM
3487 /* Check for stores. Don't worry about aliased ones, they
3488 will block any movement we might do later. We only care
3489 about this exact pattern since those are the only
3490 circumstance that we will ignore the aliasing info. */
7b1b4aed 3491 if (MEM_P (dest) && simple_mem (dest))
a13d4ebf
AM
3492 {
3493 ptr = ldst_entry (dest);
589005ff 3494
7b1b4aed 3495 if (! MEM_P (src)
4d3eb89a
HPN
3496 && GET_CODE (src) != ASM_OPERANDS
3497 /* Check for REG manually since want_to_gcse_p
3498 returns 0 for all REGs. */
df35c271 3499 && can_assign_to_reg_without_clobbers_p (src))
a13d4ebf
AM
3500 ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3501 else
3502 ptr->invalid = 1;
3503 }
3504 }
3505 else
3506 invalidate_any_buried_refs (PATTERN (insn));
3507 }
3508 }
3509 }
3510}
3511
589005ff 3512/* Remove any references that have been either invalidated or are not in the
a13d4ebf
AM
3513 expression list for pre gcse. */
3514
3515static void
1d088dee 3516trim_ld_motion_mems (void)
a13d4ebf 3517{
b58b21d5
RS
3518 struct ls_expr * * last = & pre_ldst_mems;
3519 struct ls_expr * ptr = pre_ldst_mems;
a13d4ebf
AM
3520
3521 while (ptr != NULL)
3522 {
b58b21d5 3523 struct expr * expr;
589005ff 3524
a13d4ebf 3525 /* Delete if entry has been made invalid. */
b58b21d5 3526 if (! ptr->invalid)
a13d4ebf 3527 {
a13d4ebf 3528 /* Delete if we cannot find this mem in the expression list. */
b58b21d5 3529 unsigned int hash = ptr->hash_index % expr_hash_table.size;
589005ff 3530
b58b21d5
RS
3531 for (expr = expr_hash_table.table[hash];
3532 expr != NULL;
3533 expr = expr->next_same_hash)
3534 if (expr_equiv_p (expr->expr, ptr->pattern))
3535 break;
a13d4ebf
AM
3536 }
3537 else
b58b21d5
RS
3538 expr = (struct expr *) 0;
3539
3540 if (expr)
a13d4ebf
AM
3541 {
3542 /* Set the expression field if we are keeping it. */
a13d4ebf 3543 ptr->expr = expr;
b58b21d5 3544 last = & ptr->next;
a13d4ebf
AM
3545 ptr = ptr->next;
3546 }
b58b21d5
RS
3547 else
3548 {
3549 *last = ptr->next;
9727e468 3550 htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
b58b21d5
RS
3551 free_ldst_entry (ptr);
3552 ptr = * last;
3553 }
a13d4ebf
AM
3554 }
3555
3556 /* Show the world what we've found. */
10d22567
ZD
3557 if (dump_file && pre_ldst_mems != NULL)
3558 print_ldst_list (dump_file);
a13d4ebf
AM
3559}
3560
3561/* This routine will take an expression which we are replacing with
3562 a reaching register, and update any stores that are needed if
3563 that expression is in the ld_motion list. Stores are updated by
a98ebe2e 3564 copying their SRC to the reaching register, and then storing
a13d4ebf
AM
3565 the reaching register into the store location. These keeps the
3566 correct value in the reaching register for the loads. */
3567
3568static void
1d088dee 3569update_ld_motion_stores (struct expr * expr)
a13d4ebf
AM
3570{
3571 struct ls_expr * mem_ptr;
3572
3573 if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3574 {
589005ff
KH
3575 /* We can try to find just the REACHED stores, but is shouldn't
3576 matter to set the reaching reg everywhere... some might be
a13d4ebf
AM
3577 dead and should be eliminated later. */
3578
4d3eb89a
HPN
3579 /* We replace (set mem expr) with (set reg expr) (set mem reg)
3580 where reg is the reaching reg used in the load. We checked in
3581 compute_ld_motion_mems that we can replace (set mem expr) with
3582 (set reg expr) in that insn. */
a13d4ebf 3583 rtx list = mem_ptr->stores;
589005ff 3584
a13d4ebf
AM
3585 for ( ; list != NULL_RTX; list = XEXP (list, 1))
3586 {
3587 rtx insn = XEXP (list, 0);
3588 rtx pat = PATTERN (insn);
3589 rtx src = SET_SRC (pat);
3590 rtx reg = expr->reaching_reg;
038dc49a 3591 rtx copy;
a13d4ebf
AM
3592
3593 /* If we've already copied it, continue. */
3594 if (expr->reaching_reg == src)
3595 continue;
589005ff 3596
10d22567 3597 if (dump_file)
a13d4ebf 3598 {
10d22567 3599 fprintf (dump_file, "PRE: store updated with reaching reg ");
43c8a043 3600 print_rtl (dump_file, reg);
10d22567
ZD
3601 fprintf (dump_file, ":\n ");
3602 print_inline_rtx (dump_file, insn, 8);
3603 fprintf (dump_file, "\n");
a13d4ebf 3604 }
589005ff 3605
4a81774c 3606 copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
038dc49a 3607 emit_insn_before (copy, insn);
a13d4ebf 3608 SET_SRC (pat) = reg;
6fb5fa3c 3609 df_insn_rescan (insn);
a13d4ebf
AM
3610
3611 /* un-recognize this pattern since it's probably different now. */
3612 INSN_CODE (insn) = -1;
3613 gcse_create_count++;
3614 }
3615 }
3616}
3617\f
df35c271
SB
3618/* Return true if the graph is too expensive to optimize. PASS is the
3619 optimization about to be performed. */
47a3dae1 3620
df35c271
SB
3621static bool
3622is_too_expensive (const char *pass)
3623{
3624 /* Trying to perform global optimizations on flow graphs which have
3625 a high connectivity will take a long time and is unlikely to be
3626 particularly useful.
aaa4ca30 3627
df35c271
SB
3628 In normal circumstances a cfg should have about twice as many
3629 edges as blocks. But we do not want to punish small functions
3630 which have a couple switch statements. Rather than simply
3631 threshold the number of blocks, uses something with a more
3632 graceful degradation. */
3633 if (n_edges > 20000 + n_basic_blocks * 4)
3634 {
3635 warning (OPT_Wdisabled_optimization,
3636 "%s: %d basic blocks and %d edges/basic block",
3637 pass, n_basic_blocks, n_edges / n_basic_blocks);
a13d4ebf 3638
df35c271
SB
3639 return true;
3640 }
a13d4ebf 3641
e45425ec 3642 /* If allocating memory for the dataflow bitmaps would take up too much
df35c271
SB
3643 storage it's better just to disable the optimization. */
3644 if ((n_basic_blocks
3645 * SBITMAP_SET_SIZE (max_reg_num ())
3646 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3647 {
3648 warning (OPT_Wdisabled_optimization,
3649 "%s: %d basic blocks and %d registers",
3650 pass, n_basic_blocks, max_reg_num ());
a13d4ebf 3651
df35c271
SB
3652 return true;
3653 }
adfcce61 3654
df35c271 3655 return false;
01c43039 3656}
df35c271
SB
3657\f
3658/* All the passes implemented in this file. Each pass has its
3659 own gate and execute function, and at the end of the file a
3660 pass definition for passes.c.
47a3dae1 3661
df35c271
SB
3662 We do not construct an accurate cfg in functions which call
3663 setjmp, so none of these passes runs if the function calls
3664 setjmp.
3665 FIXME: Should just handle setjmp via REG_SETJMP notes. */
a13d4ebf 3666
df35c271
SB
3667static bool
3668gate_rtl_pre (void)
3669{
3670 return optimize > 0 && flag_gcse
1f9081d1
XDL
3671 && !cfun->calls_setjmp
3672 && optimize_function_for_speed_p (cfun)
3673 && dbg_cnt (pre);
df35c271 3674}
589005ff 3675
df35c271
SB
3676static unsigned int
3677execute_rtl_pre (void)
3678{
f2b01cfb 3679 int changed;
df35c271 3680 delete_unreachable_blocks ();
df35c271 3681 df_analyze ();
f2b01cfb
RG
3682 changed = one_pre_gcse_pass ();
3683 flag_rerun_cse_after_global_opts |= changed;
3684 if (changed)
3685 cleanup_cfg (0);
df35c271
SB
3686 return 0;
3687}
aaa4ca30 3688
df35c271
SB
3689static bool
3690gate_rtl_hoist (void)
3691{
3692 return optimize > 0 && flag_gcse
1f9081d1
XDL
3693 && !cfun->calls_setjmp
3694 /* It does not make sense to run code hoisting unless we are optimizing
3695 for code size -- it rarely makes programs faster, and can make then
3696 bigger if we did PRE (when optimizing for space, we don't run PRE). */
3697 && optimize_function_for_size_p (cfun)
3698 && dbg_cnt (hoist);
df35c271 3699}
aaa4ca30 3700
df35c271
SB
3701static unsigned int
3702execute_rtl_hoist (void)
3703{
f2b01cfb 3704 int changed;
df35c271 3705 delete_unreachable_blocks ();
df35c271 3706 df_analyze ();
f2b01cfb
RG
3707 changed = one_code_hoisting_pass ();
3708 flag_rerun_cse_after_global_opts |= changed;
3709 if (changed)
3710 cleanup_cfg (0);
df35c271
SB
3711 return 0;
3712}
ef330312 3713
5f39ad47 3714struct rtl_opt_pass pass_rtl_pre =
ef330312 3715{
5f39ad47
SB
3716 {
3717 RTL_PASS,
e0a42b0f 3718 "rtl pre", /* name */
b8698a0f
L
3719 gate_rtl_pre, /* gate */
3720 execute_rtl_pre, /* execute */
5f39ad47
SB
3721 NULL, /* sub */
3722 NULL, /* next */
3723 0, /* static_pass_number */
3724 TV_PRE, /* tv_id */
3725 PROP_cfglayout, /* properties_required */
3726 0, /* properties_provided */
3727 0, /* properties_destroyed */
3728 0, /* todo_flags_start */
3729 TODO_df_finish | TODO_verify_rtl_sharing |
5f39ad47
SB
3730 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3731 }
3732};
ef330312 3733
5f39ad47 3734struct rtl_opt_pass pass_rtl_hoist =
ef330312 3735{
8ddbbcae
JH
3736 {
3737 RTL_PASS,
5f39ad47 3738 "hoist", /* name */
b8698a0f
L
3739 gate_rtl_hoist, /* gate */
3740 execute_rtl_hoist, /* execute */
ef330312
PB
3741 NULL, /* sub */
3742 NULL, /* next */
3743 0, /* static_pass_number */
5f39ad47 3744 TV_HOIST, /* tv_id */
9c9e26f5 3745 PROP_cfglayout, /* properties_required */
ef330312
PB
3746 0, /* properties_provided */
3747 0, /* properties_destroyed */
3748 0, /* todo_flags_start */
a36b8a1e 3749 TODO_df_finish | TODO_verify_rtl_sharing |
8ddbbcae
JH
3750 TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
3751 }
ef330312
PB
3752};
3753
e2500fed 3754#include "gt-gcse.h"
This page took 5.003547 seconds and 5 git commands to generate.