]> gcc.gnu.org Git - gcc.git/blame - gcc/fwprop.cc
ada: Another couple of cleanups in the finalization machinery
[gcc.git] / gcc / fwprop.cc
CommitLineData
a52b023a 1/* RTL-based forward propagation pass for GNU compiler.
aeee4812 2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
a52b023a
PB
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
a52b023a
PB
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
a52b023a 20
0b76990a
RS
21#define INCLUDE_ALGORITHM
22#define INCLUDE_FUNCTIONAL
a52b023a
PB
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
c7131fb2
AM
26#include "backend.h"
27#include "rtl.h"
1815e313 28#include "rtlanal.h"
c7131fb2 29#include "df.h"
0b76990a 30#include "rtl-ssa.h"
957060b5 31
0b76990a 32#include "predict.h"
60393bbc
AM
33#include "cfgrtl.h"
34#include "cfgcleanup.h"
a52b023a
PB
35#include "cfgloop.h"
36#include "tree-pass.h"
aa4e2d7e 37#include "rtl-iter.h"
0b76990a 38#include "target.h"
a52b023a
PB
39
40/* This pass does simple forward propagation and simplification when an
41 operand of an insn can only come from a single def. This pass uses
0b76990a 42 RTL SSA, so it is global. However, we only do limited analysis of
a52b023a
PB
43 available expressions.
44
45 1) The pass tries to propagate the source of the def into the use,
46 and checks if the result is independent of the substituted value.
47 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48 zero, independent of the source register.
49
50 In particular, we propagate constants into the use site. Sometimes
51 RTL expansion did not put the constant in the same insn on purpose,
52 to satisfy a predicate, and the result will fail to be recognized;
53 but this happens rarely and in this case we can still create a
54 REG_EQUAL note. For multi-word operations, this
55
56 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58 (set (subreg:SI (reg:DI 122) 0)
0b76990a 59 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
a52b023a 60 (set (subreg:SI (reg:DI 122) 4)
0b76990a 61 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
a52b023a
PB
62
63 can be simplified to the much simpler
64
65 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
67
68 This particular propagation is also effective at putting together
69 complex addressing modes. We are more aggressive inside MEMs, in
70 that all definitions are propagated if the use is in a MEM; if the
71 result is a valid memory address we check address_cost to decide
72 whether the substitution is worthwhile.
73
74 2) The pass propagates register copies. This is not as effective as
75 the copy propagation done by CSE's canon_reg, which works by walking
76 the instruction chain, it can help the other transformations.
77
78 We should consider removing this optimization, and instead reorder the
79 RTL passes, because GCSE does this transformation too. With some luck,
80 the CSE pass at the end of rest_of_handle_gcse could also go away.
81
82 3) The pass looks for paradoxical subregs that are actually unnecessary.
83 Things like this:
84
85 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
0b76990a 88 (subreg:SI (reg:QI 121) 0)))
a52b023a
PB
89
90 are very common on machines that can only do word-sized operations.
91 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93 we can replace the paradoxical subreg with simply (reg:WIDE M). The
94 above will simplify this to
95
96 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99
0b76990a 100 where the first two insns are now dead. */
a52b023a 101
0b76990a 102using namespace rtl_ssa;
a52b023a 103
a52b023a
PB
104static int num_changes;
105
a52b023a
PB
106/* Do not try to replace constant addresses or addresses of local and
107 argument slots. These MEM expressions are made only once and inserted
108 in many instructions, as well as being used to control symbol table
109 output. It is not safe to clobber them.
110
111 There are some uncommon cases where the address is already in a register
112 for some reason, but we cannot take advantage of that because we have
113 no easy way to unshare the MEM. In addition, looking up all stack
114 addresses is costly. */
115
116static bool
117can_simplify_addr (rtx addr)
118{
119 rtx reg;
120
121 if (CONSTANT_ADDRESS_P (addr))
122 return false;
123
124 if (GET_CODE (addr) == PLUS)
125 reg = XEXP (addr, 0);
126 else
127 reg = addr;
128
129 return (!REG_P (reg)
130 || (REGNO (reg) != FRAME_POINTER_REGNUM
131 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 && REGNO (reg) != ARG_POINTER_REGNUM));
133}
134
0b76990a
RS
135/* MEM is the result of an address simplification, and temporarily
136 undoing changes OLD_NUM_CHANGES onwards restores the original address.
137 Return whether it is good to use the new address instead of the
138 old one. INSN is the containing instruction. */
a52b023a
PB
139
140static bool
0b76990a 141should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
a52b023a
PB
142{
143 int gain;
144
a52b023a 145 /* Prefer the new address if it is less expensive. */
0b76990a
RS
146 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147 temporarily_undo_changes (old_num_changes);
148 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 MEM_ADDR_SPACE (mem), speed);
150 redo_changes (old_num_changes);
151 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 MEM_ADDR_SPACE (mem), speed);
a52b023a
PB
153
154 /* If the addresses have equivalent cost, prefer the new address
5e8f01f4 155 if it has the highest `set_src_cost'. That has the potential of
a52b023a 156 eliminating the most insns without additional costs, and it
e53b6e56 157 is the same that cse.cc used to do. */
a52b023a 158 if (gain == 0)
0b76990a
RS
159 {
160 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
161 temporarily_undo_changes (old_num_changes);
162 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 redo_changes (old_num_changes);
164 }
a52b023a
PB
165
166 return (gain > 0);
167}
168
460d667d 169
0b76990a
RS
170namespace
171{
172 class fwprop_propagation : public insn_propagation
173 {
174 public:
175 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
460d667d 178
b61461ac 179 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
460d667d 180
0b76990a
RS
181 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182 bool folded_to_constants_p () const;
183 bool profitable_p () const;
f40751dd 184
0b76990a
RS
185 bool check_mem (int, rtx) final override;
186 void note_simplification (int, uint16_t, rtx, rtx) final override;
187 uint16_t classify_result (rtx, rtx);
efb6bc55
IL
188
189 private:
190 const bool single_use_p;
191 const bool single_ebb_p;
0b76990a
RS
192 };
193}
460d667d 194
b61461ac 195/* Prepare to replace FROM with TO in USE_INSN. */
75da268e 196
efb6bc55 197fwprop_propagation::fwprop_propagation (insn_info *use_insn,
b61461ac 198 set_info *def, rtx from, rtx to)
efb6bc55 199 : insn_propagation (use_insn->rtl (), from, to),
b61461ac
IL
200 single_use_p (def->single_nondebug_use ()),
201 single_ebb_p (use_insn->ebb () == def->ebb ())
75da268e 202{
0b76990a
RS
203 should_check_mems = true;
204 should_note_simplifications = true;
75da268e 205}
460d667d 206
0b76990a
RS
207/* MEM is the result of an address simplification, and temporarily
208 undoing changes OLD_NUM_CHANGES onwards restores the original address.
209 Return true if the propagation should continue, false if it has failed. */
a52b023a 210
0b76990a
RS
211bool
212fwprop_propagation::check_mem (int old_num_changes, rtx mem)
a52b023a 213{
0b76990a
RS
214 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
215 MEM_ADDR_SPACE (mem)))
460d667d 216 {
0b76990a 217 failure_reason = "would create an invalid MEM";
460d667d
PB
218 return false;
219 }
a52b023a 220
0b76990a
RS
221 temporarily_undo_changes (old_num_changes);
222 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
223 redo_changes (old_num_changes);
224 if (!can_simplify)
a52b023a 225 {
0b76990a
RS
226 failure_reason = "would replace a frame address";
227 return false;
a52b023a
PB
228 }
229
0b76990a
RS
230 /* Copy propagations are always ok. Otherwise check the costs. */
231 if (!(REG_P (from) && REG_P (to))
232 && !should_replace_address (old_num_changes, mem, insn))
a52b023a 233 {
0b76990a
RS
234 failure_reason = "would increase the cost of a MEM";
235 return false;
236 }
a52b023a 237
0b76990a
RS
238 result_flags |= CHANGED_MEM;
239 return true;
240}
a52b023a 241
0b76990a
RS
242/* OLDX has been simplified to NEWX. Describe the change in terms of
243 result_flags. */
a52b023a 244
0b76990a
RS
245uint16_t
246fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247{
248 if (CONSTANT_P (new_rtx))
249 {
250 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
251 and NEW_RTX is likely not a legitimate address. We want it to
252 disappear if it is invalid.
253
254 ??? Using the mode of the LO_SUM as the mode of the address
255 seems odd, but it was what the pre-SSA code did. */
256 if (GET_CODE (old_rtx) == LO_SUM
257 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
258 return CONSTANT;
259 return CONSTANT | PROFITABLE;
a52b023a
PB
260 }
261
712a93d6
RB
262 /* Allow replacements that simplify operations on a vector or complex
263 value to a component. The most prominent case is
264 (subreg ([vec_]concat ...)). */
0b76990a
RS
265 if (REG_P (new_rtx)
266 && !HARD_REGISTER_P (new_rtx)
267 && (VECTOR_MODE_P (GET_MODE (from))
268 || COMPLEX_MODE_P (GET_MODE (from)))
269 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270 return PROFITABLE;
712a93d6 271
efb6bc55
IL
272 /* Allow (subreg (mem)) -> (mem) simplifications with the following
273 exceptions:
274 1) Propagating (mem)s into multiple uses is not profitable.
275 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276 runs less frequently.
277 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279 ones. */
280 if (single_use_p
281 && single_ebb_p
282 && SUBREG_P (old_rtx)
283 && !paradoxical_subreg_p (old_rtx)
284 && MEM_P (new_rtx)
285 && !MEM_VOLATILE_P (new_rtx))
286 return PROFITABLE;
287
0b76990a 288 return 0;
a52b023a
PB
289}
290
0b76990a
RS
291/* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
292 is the number of unrelated changes that had been made before processing
293 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
294 had at that point. */
460d667d 295
0b76990a
RS
296void
297fwprop_propagation::note_simplification (int old_num_changes,
298 uint16_t old_result_flags,
299 rtx old_rtx, rtx new_rtx)
300{
301 result_flags &= ~(CONSTANT | PROFITABLE);
302 uint16_t new_flags = classify_result (old_rtx, new_rtx);
303 if (old_num_changes)
304 new_flags &= old_result_flags;
305 result_flags |= new_flags;
306}
460d667d 307
0b76990a
RS
308/* Return true if all substitutions eventually folded to constants. */
309
310bool
311fwprop_propagation::folded_to_constants_p () const
460d667d 312{
0b76990a
RS
313 /* If we're propagating a HIGH, require it to be folded with a
314 partnering LO_SUM. For example, a REG_EQUAL note with a register
315 replaced by an unfolded HIGH is not useful. */
316 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317 return true;
318 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
460d667d
PB
319}
320
321
0b76990a
RS
322/* Return true if it is worth keeping the result of the propagation,
323 false if it would increase the complexity of the pattern too much. */
a52b023a 324
0b76990a
RS
325bool
326fwprop_propagation::profitable_p () const
a52b023a 327{
0b76990a
RS
328 if (changed_mem_p ())
329 return true;
a52b023a 330
0b76990a
RS
331 if (!(result_flags & UNSIMPLIFIED)
332 && (result_flags & PROFITABLE))
333 return true;
a52b023a 334
0b76990a
RS
335 if (REG_P (to))
336 return true;
a52b023a 337
0b76990a
RS
338 if (GET_CODE (to) == SUBREG
339 && REG_P (SUBREG_REG (to))
340 && !paradoxical_subreg_p (to))
341 return true;
a52b023a 342
0b76990a
RS
343 if (CONSTANT_P (to))
344 return true;
a52b023a 345
0b76990a
RS
346 return false;
347}
a52b023a 348
0b76990a 349/* Check that X has a single def. */
a52b023a 350
6fb5fa3c 351static bool
0b76990a 352reg_single_def_p (rtx x)
a52b023a 353{
0b76990a
RS
354 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
355}
a52b023a 356
b61461ac
IL
357/* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
358 USE_INSN. Return the number of substitutions on success, otherwise return
359 -1 and leave USE_INSN unchanged.
a52b023a 360
b61461ac 361 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
0b76990a
RS
362 to fold to a constant, so that the note does not use any more registers
363 than it did previously. If REQUIRE_CONSTANT is false, also allow the
364 substitution if it's something we'd normally allow for the main
365 instruction pattern. */
692e7e54 366
0b76990a 367static int
b61461ac 368try_fwprop_subst_note (insn_info *use_insn, set_info *def,
0b76990a 369 rtx note, rtx dest, rtx src, bool require_constant)
a52b023a 370{
0b76990a 371 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 372 insn_info *def_insn = def->insn ();
a52b023a 373
0b76990a 374 insn_change_watermark watermark;
b61461ac 375 fwprop_propagation prop (use_insn, def, dest, src);
0b76990a 376 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
a52b023a 377 {
0b76990a
RS
378 if (dump_file && (dump_flags & TDF_DETAILS))
379 fprintf (dump_file, "cannot propagate from insn %d into"
380 " notes of insn %d: %s\n", def_insn->uid (),
381 use_insn->uid (), prop.failure_reason);
382 return -1;
a52b023a
PB
383 }
384
0b76990a
RS
385 if (prop.num_replacements == 0)
386 return 0;
a52b023a 387
0b76990a 388 if (require_constant)
a52b023a 389 {
0b76990a
RS
390 if (!prop.folded_to_constants_p ())
391 {
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), "wouldn't fold to constants");
396 return -1;
397 }
a52b023a
PB
398 }
399 else
400 {
0b76990a 401 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
6fb5fa3c 402 {
0b76990a
RS
403 if (dump_file && (dump_flags & TDF_DETAILS))
404 fprintf (dump_file, "cannot propagate from insn %d into"
405 " notes of insn %d: %s\n", def_insn->uid (),
406 use_insn->uid (), "would increase complexity of node");
407 return -1;
6fb5fa3c 408 }
a52b023a
PB
409 }
410
0b76990a
RS
411 if (dump_file && (dump_flags & TDF_DETAILS))
412 {
413 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
414 INSN_UID (use_rtl));
415 temporarily_undo_changes (0);
416 print_inline_rtx (dump_file, note, 2);
417 redo_changes (0);
418 fprintf (dump_file, "\n with:\n ");
419 print_inline_rtx (dump_file, note, 2);
420 fprintf (dump_file, "\n");
421 }
422 watermark.keep ();
423 return prop.num_replacements;
a52b023a
PB
424}
425
b61461ac 426/* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
0b76990a
RS
427 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
428 unchanged. */
a52b023a 429
0b76990a
RS
430static bool
431try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
b61461ac 432 set_info *def, rtx *loc, rtx dest, rtx src)
a52b023a 433{
0b76990a
RS
434 insn_info *use_insn = use_change.insn ();
435 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 436 insn_info *def_insn = def->insn ();
a52b023a 437
0b76990a 438 insn_change_watermark watermark;
b61461ac 439 fwprop_propagation prop (use_insn, def, dest, src);
0b76990a
RS
440 if (!prop.apply_to_pattern (loc))
441 {
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, "cannot propagate from insn %d into"
444 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
445 prop.failure_reason);
446 return false;
dc007c1f 447 }
a52b023a 448
0b76990a
RS
449 if (prop.num_replacements == 0)
450 return false;
a52b023a 451
0b76990a
RS
452 if (!prop.profitable_p ())
453 {
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "cannot propagate from insn %d into"
456 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
457 "would increase complexity of pattern");
458 return false;
459 }
a52b023a 460
0b76990a
RS
461 if (dump_file && (dump_flags & TDF_DETAILS))
462 {
463 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
464 def_insn->uid (), use_insn->uid ());
465 temporarily_undo_changes (0);
466 print_rtl_single (dump_file, PATTERN (use_rtl));
467 redo_changes (0);
468 }
a52b023a 469
0b76990a
RS
470 /* ??? In theory, it should be better to use insn costs rather than
471 set_src_costs here. That would involve replacing this code with
472 change_is_worthwhile. */
473 bool ok = recog (attempt, use_change);
474 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
475 if (rtx use_set = single_set (use_rtl))
476 {
477 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
478 temporarily_undo_changes (0);
479 auto old_cost = set_src_cost (SET_SRC (use_set),
480 GET_MODE (SET_DEST (use_set)), speed);
481 redo_changes (0);
482 auto new_cost = set_src_cost (SET_SRC (use_set),
483 GET_MODE (SET_DEST (use_set)), speed);
484 if (new_cost > old_cost)
485 {
486 if (dump_file)
487 fprintf (dump_file, "change not profitable"
488 " (cost %d -> cost %d)\n", old_cost, new_cost);
489 ok = false;
490 }
491 }
a52b023a 492
0b76990a 493 if (!ok)
a52b023a 494 {
0b76990a
RS
495 /* The pattern didn't match, but if all uses of SRC folded to
496 constants, we can add a REG_EQUAL note for the result, if there
497 isn't one already. */
498 if (!prop.folded_to_constants_p ())
499 return false;
a52b023a 500
0b76990a
RS
501 /* Test this first to avoid creating an unnecessary copy of SRC. */
502 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
503 return false;
a52b023a 504
0b76990a
RS
505 rtx set = set_for_reg_notes (use_rtl);
506 if (!set || !REG_P (SET_DEST (set)))
507 return false;
a52b023a 508
0b76990a
RS
509 rtx value = copy_rtx (SET_SRC (set));
510 cancel_changes (0);
dc007c1f 511
0b76990a
RS
512 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
513 because the bits in there can be anything and so might not
514 match the REG_EQUAL note content. See PR70574. */
515 if (contains_paradoxical_subreg_p (SET_SRC (set)))
516 return false;
dc007c1f 517
0b76990a
RS
518 if (dump_file && (dump_flags & TDF_DETAILS))
519 fprintf (dump_file, " Setting REG_EQUAL note\n");
dc007c1f 520
0b76990a 521 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
dc007c1f 522 }
0b76990a
RS
523
524 rtx *note_ptr = &REG_NOTES (use_rtl);
525 while (rtx note = *note_ptr)
dc007c1f 526 {
0b76990a
RS
527 if ((REG_NOTE_KIND (note) == REG_EQUAL
528 || REG_NOTE_KIND (note) == REG_EQUIV)
b61461ac 529 && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
0b76990a
RS
530 {
531 *note_ptr = XEXP (note, 1);
532 free_EXPR_LIST_node (note);
533 }
534 else
535 note_ptr = &XEXP (note, 1);
a52b023a 536 }
dc007c1f 537
0b76990a
RS
538 confirm_change_group ();
539 crtl->ssa->change_insn (use_change);
540 num_changes++;
541 return true;
a52b023a
PB
542}
543
b61461ac 544/* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
0b76990a
RS
545 given that it was not possible to do this for USE_INSN's main pattern.
546 Return true on success, otherwise leave USE_INSN unchanged. */
a52b023a
PB
547
548static bool
b61461ac 549try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
0b76990a
RS
550 rtx dest, rtx src)
551{
552 rtx_insn *use_rtl = use_insn->rtl ();
553 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
554 if ((REG_NOTE_KIND (note) == REG_EQUAL
555 || REG_NOTE_KIND (note) == REG_EQUIV)
b61461ac 556 && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
0b76990a
RS
557 {
558 confirm_change_group ();
559 return true;
560 }
a52b023a 561
0b76990a
RS
562 return false;
563}
dc007c1f 564
b61461ac 565/* Check whether we could validly substitute (set DEST SRC), which defines DEF,
0b76990a
RS
566 into USE. If so, first try performing the substitution in location LOC
567 of USE->insn ()'s pattern. If that fails, try instead to substitute
568 into the notes.
a52b023a 569
0b76990a 570 Return true on success, otherwise leave USE_INSN unchanged. */
de266950 571
0b76990a 572static bool
b61461ac 573try_fwprop_subst (use_info *use, set_info *def,
0b76990a
RS
574 rtx *loc, rtx dest, rtx src)
575{
576 insn_info *use_insn = use->insn ();
b61461ac 577 insn_info *def_insn = def->insn ();
de266950 578
0b76990a
RS
579 auto attempt = crtl->ssa->new_change_attempt ();
580 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
581
582 /* ??? Not really a meaningful test: it means we can propagate arithmetic
583 involving hard registers but not bare references to them. A better
584 test would be to iterate over src_uses looking for hard registers
585 that are not fixed. */
586 if (REG_P (src) && HARD_REGISTER_P (src))
587 return false;
de266950 588
0b76990a
RS
589 /* ??? It would be better to make this EBB-based instead. That would
590 involve checking for equal EBBs rather than equal BBs and trying
591 to make the uses available at use_insn->ebb ()->first_bb (). */
592 if (def_insn->bb () != use_insn->bb ())
de266950 593 {
0b76990a 594 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
c97351c0
RS
595 use_insn->bb (),
596 use_insn->is_debug_insn ());
0b76990a
RS
597 if (!src_uses.is_valid ())
598 return false;
a52b023a 599 }
a52b023a 600
0b76990a
RS
601 insn_change use_change (use_insn);
602 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
603 src_uses);
604 if (!use_change.new_uses.is_valid ())
605 return false;
1a13c0a2 606
0b76990a 607 /* ??? We could allow movement within the EBB by adding:
de266950 608
0b76990a
RS
609 use_change.move_range = use_insn->ebb ()->insn_range (); */
610 if (!restrict_movement (use_change))
611 return false;
dc007c1f 612
b61461ac
IL
613 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
614 || try_fwprop_subst_notes (use_insn, def, dest, src));
a52b023a
PB
615}
616
e85122be
AM
617/* For the given single_set INSN, containing SRC known to be a
618 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
619 is redundant due to the register being set by a LOAD_EXTEND_OP
620 load from memory. */
621
622static bool
0b76990a 623free_load_extend (rtx src, insn_info *insn)
e25b7843 624{
0b76990a 625 rtx reg = XEXP (src, 0);
3712c7a3 626 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
e85122be 627 return false;
e25b7843 628
0b76990a
RS
629 def_info *def = nullptr;
630 for (use_info *use : insn->uses ())
631 if (use->regno () == REGNO (reg))
632 {
633 def = use->def ();
634 break;
635 }
e85122be 636
e85122be
AM
637 if (!def)
638 return false;
639
0b76990a
RS
640 insn_info *def_insn = def->insn ();
641 if (def_insn->is_artificial ())
e85122be
AM
642 return false;
643
0b76990a
RS
644 rtx_insn *def_rtl = def_insn->rtl ();
645 if (NONJUMP_INSN_P (def_rtl))
e85122be 646 {
0b76990a 647 rtx patt = PATTERN (def_rtl);
e85122be
AM
648
649 if (GET_CODE (patt) == SET
650 && GET_CODE (SET_SRC (patt)) == MEM
651 && rtx_equal_p (SET_DEST (patt), reg))
652 return true;
e25b7843 653 }
e85122be 654 return false;
e25b7843 655}
e25b7843 656
0b76990a
RS
657/* Subroutine of forward_propagate_subreg that handles a use of DEST
658 in REF. The other parameters are the same. */
a52b023a
PB
659
660static bool
b61461ac 661forward_propagate_subreg (use_info *use, set_info *def,
0b76990a 662 rtx dest, rtx src, df_ref ref)
a52b023a 663{
6b9c3dec 664 scalar_int_mode int_use_mode, src_mode;
a52b023a 665
e25b7843 666 /* Only consider subregs... */
0b76990a 667 rtx use_reg = DF_REF_REG (ref);
ef4bddc2 668 machine_mode use_mode = GET_MODE (use_reg);
a52b023a 669 if (GET_CODE (use_reg) != SUBREG
0b76990a 670 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
a52b023a
PB
671 return false;
672
0b76990a
RS
673 /* ??? Replacing throughout the pattern would help for match_dups. */
674 rtx *loc = DF_REF_LOC (ref);
03a95621 675 if (paradoxical_subreg_p (use_reg))
e25b7843
AM
676 {
677 /* If this is a paradoxical SUBREG, we have no idea what value the
678 extra bits would have. However, if the operand is equivalent to
679 a SUBREG whose operand is the same as our mode, and all the modes
680 are within a word, we can just use the inner operand because
681 these SUBREGs just say how to treat the register. */
e25b7843
AM
682 if (GET_CODE (src) == SUBREG
683 && REG_P (SUBREG_REG (src))
509a31f8 684 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
e25b7843 685 && GET_MODE (SUBREG_REG (src)) == use_mode
0b76990a 686 && subreg_lowpart_p (src))
b61461ac 687 return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
e25b7843
AM
688 }
689
690 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
691 is the low part of the reg being extended then just use the inner
692 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
7a613929
RS
693 be removed due to it matching a LOAD_EXTEND_OP load from memory,
694 or due to the operation being a no-op when applied to registers.
695 For example, if we have:
696
697 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
698 B: (... (subreg:SI (reg:DI X)) ...)
699
700 and mode_rep_extended says that Y is already sign-extended,
701 the backend will typically allow A to be combined with the
702 definition of Y or, failing that, allow A to be deleted after
703 reload through register tying. Introducing more uses of Y
704 prevents both optimisations. */
6b9c3dec
RS
705 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
706 && subreg_lowpart_p (use_reg))
e25b7843 707 {
e25b7843
AM
708 if ((GET_CODE (src) == ZERO_EXTEND
709 || GET_CODE (src) == SIGN_EXTEND)
6b9c3dec 710 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
e25b7843 711 && REG_P (XEXP (src, 0))
509a31f8 712 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
e25b7843 713 && GET_MODE (XEXP (src, 0)) == use_mode
b61461ac 714 && !free_load_extend (src, def->insn ())
6b9c3dec 715 && (targetm.mode_rep_extended (int_use_mode, src_mode)
0b76990a 716 != (int) GET_CODE (src)))
b61461ac 717 return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
e25b7843
AM
718 }
719
720 return false;
a52b023a
PB
721}
722
b61461ac 723/* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
0b76990a
RS
724 the result, handling cases where DEST is used in a subreg and where
725 applying that subreg to SRC results in a useful simplification. */
ce372372
JJ
726
727static bool
b61461ac 728forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
ce372372 729{
0b76990a
RS
730 if (!use->includes_subregs () || !REG_P (dest))
731 return false;
ce372372 732
0b76990a
RS
733 if (GET_CODE (src) != SUBREG
734 && GET_CODE (src) != ZERO_EXTEND
735 && GET_CODE (src) != SIGN_EXTEND)
ce372372
JJ
736 return false;
737
0b76990a
RS
738 rtx_insn *use_rtl = use->insn ()->rtl ();
739 df_ref ref;
ce372372 740
0b76990a
RS
741 FOR_EACH_INSN_USE (ref, use_rtl)
742 if (DF_REF_REGNO (ref) == use->regno ()
b61461ac 743 && forward_propagate_subreg (use, def, dest, src, ref))
0b76990a 744 return true;
ce372372 745
0b76990a
RS
746 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
747 if (DF_REF_REGNO (ref) == use->regno ()
b61461ac 748 && forward_propagate_subreg (use, def, dest, src, ref))
0b76990a 749 return true;
ce372372 750
0b76990a 751 return false;
ce372372
JJ
752}
753
b61461ac 754/* Try to substitute (set DEST SRC), which defines DEF, into USE and
0b76990a 755 simplify the result. */
a52b023a
PB
756
757static bool
b61461ac 758forward_propagate_and_simplify (use_info *use, set_info *def,
0b76990a
RS
759 rtx dest, rtx src)
760{
761 insn_info *use_insn = use->insn ();
762 rtx_insn *use_rtl = use_insn->rtl ();
b61461ac 763 insn_info *def_insn = def->insn ();
0b76990a
RS
764
765 /* ??? This check seems unnecessary. We should be able to propagate
766 into any kind of instruction, regardless of whether it's a single set.
767 It seems odd to be more permissive with asms than normal instructions. */
768 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
769 rtx use_set = single_set (use_rtl);
770 if (need_single_set && !use_set)
a52b023a
PB
771 return false;
772
bd1cd0d0 773 /* Do not propagate into PC etc.
a52b023a 774
0b76990a
RS
775 ??? This too seems unnecessary. The current code should work correctly
776 without it, including cases where jumps become unconditional. */
777 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
8e8af9b7
RS
778 return false;
779
0b76990a
RS
780 /* In __asm don't replace if src might need more registers than
781 reg, as that could increase register pressure on the __asm. */
782 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
a52b023a
PB
783 return false;
784
785 /* Check if the def is loading something from the constant pool; in this
786 case we would undo optimization such as compress_float_constant.
787 Still, we can set a REG_EQUAL note. */
788 if (MEM_P (src) && MEM_READONLY_P (src))
789 {
790 rtx x = avoid_constant_pool_reference (src);
0b76990a
RS
791 rtx note_set;
792 if (x != src
793 && (note_set = set_for_reg_notes (use_rtl))
794 && REG_P (SET_DEST (note_set))
795 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
a52b023a 796 {
0b76990a
RS
797 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
798 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
60564289
KG
799 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
800 if (old_rtx != new_rtx)
0b76990a 801 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
a52b023a
PB
802 }
803 return false;
804 }
805
0b76990a
RS
806 /* ??? Unconditionally propagating into PATTERN would work better
807 for instructions that have match_dups. */
808 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
b61461ac 809 return try_fwprop_subst (use, def, loc, dest, src);
a52b023a
PB
810}
811
a52b023a 812/* Given a use USE of an insn, if it has a single reaching
7360d2ac 813 definition, try to forward propagate it into that insn.
0b76990a
RS
814 Return true if something changed.
815
75da268e 816 REG_PROP_ONLY is true if we should only propagate register copies. */
a52b023a 817
7360d2ac 818static bool
0b76990a 819forward_propagate_into (use_info *use, bool reg_prop_only = false)
a52b023a 820{
0b76990a 821 if (use->includes_read_writes ())
7360d2ac 822 return false;
a52b023a 823
0b76990a 824 /* Disregard uninitialized uses. */
b61461ac 825 set_info *def = use->def ();
00952e97 826 if (!def)
7360d2ac 827 return false;
a52b023a 828
0b76990a
RS
829 /* Only consider single-register definitions. This could be relaxed,
830 but it should rarely be needed before RA. */
831 def = look_through_degenerate_phi (def);
832 if (def->includes_multiregs ())
833 return false;
a52b023a 834
0b76990a
RS
835 /* Only consider uses whose definition comes from a real instruction. */
836 insn_info *def_insn = def->insn ();
837 if (def_insn->is_artificial ())
7360d2ac 838 return false;
a52b023a 839
0b76990a
RS
840 rtx_insn *def_rtl = def_insn->rtl ();
841 if (!NONJUMP_INSN_P (def_rtl))
842 return false;
843 /* ??? This seems an unnecessary restriction. We can easily tell
844 which set the definition comes from. */
845 if (multiple_sets (def_rtl))
7360d2ac 846 return false;
0b76990a 847 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
a52b023a 848 if (!def_set)
7360d2ac 849 return false;
a52b023a 850
0b76990a
RS
851 rtx dest = SET_DEST (def_set);
852 rtx src = SET_SRC (def_set);
75da268e
PK
853
854 /* Allow propagations into a loop only for reg-to-reg copies, since
e7a7dbb5 855 replacing one register by another shouldn't increase the cost.
856 Propagations from inner loop to outer loop should also be ok. */
0b76990a
RS
857 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
858 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
e7a7dbb5 859 if ((reg_prop_only
860 || (def_loop != use_loop
861 && !flow_loop_nested_p (use_loop, def_loop)))
0b76990a
RS
862 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
863 return false;
75da268e 864
0b76990a
RS
865 /* Don't substitute into a non-local goto, this confuses CFG. */
866 insn_info *use_insn = use->insn ();
867 rtx_insn *use_rtl = use_insn->rtl ();
868 if (JUMP_P (use_rtl)
869 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
75da268e
PK
870 return false;
871
b61461ac
IL
872 if (forward_propagate_and_simplify (use, def, dest, src)
873 || forward_propagate_subreg (use, def, dest, src))
0b76990a 874 return true;
a5a9046d 875
7360d2ac 876 return false;
a52b023a 877}
a52b023a
PB
878\f
879static void
880fwprop_init (void)
881{
882 num_changes = 0;
6e0b633f 883 calculate_dominance_info (CDI_DOMINATORS);
a52b023a
PB
884
885 /* We do not always want to propagate into loops, so we have to find
c5f4be84
SB
886 loops and be careful about them. Avoid CFG modifications so that
887 we don't have to update dominance information afterwards for
888 build_single_def_use_links. */
889 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
a52b023a 890
0b76990a
RS
891 df_analyze ();
892 crtl->ssa = new rtl_ssa::function_info (cfun);
a52b023a
PB
893}
894
895static void
896fwprop_done (void)
897{
fb406162 898 loop_optimizer_finalize ();
6fb5fa3c 899
0b76990a 900 crtl->ssa->perform_pending_updates ();
6e0b633f 901 free_dominance_info (CDI_DOMINATORS);
a52b023a 902 cleanup_cfg (0);
0b76990a
RS
903
904 delete crtl->ssa;
905 crtl->ssa = nullptr;
906
a52b023a
PB
907 delete_trivially_dead_insns (get_insns (), max_reg_num ());
908
909 if (dump_file)
910 fprintf (dump_file,
911 "\nNumber of successful forward propagations: %d\n\n",
912 num_changes);
913}
914
0b76990a
RS
915/* Try to optimize INSN, returning true if something changes.
916 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
917 the full fwprop. */
918
919static bool
920fwprop_insn (insn_info *insn, bool fwprop_addr_p)
921{
922 for (use_info *use : insn->uses ())
923 {
924 if (use->is_mem ())
925 continue;
926 /* ??? The choices here follow those in the pre-SSA code. */
927 if (!use->includes_address_uses ())
928 {
929 if (forward_propagate_into (use, fwprop_addr_p))
930 return true;
931 }
932 else
933 {
934 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
935 /* The outermost loop is not really a loop. */
936 if (loop == NULL || loop_outer (loop) == NULL)
937 {
938 if (forward_propagate_into (use, fwprop_addr_p))
939 return true;
940 }
941 else if (fwprop_addr_p)
942 {
943 if (forward_propagate_into (use, false))
944 return true;
945 }
946 }
947 }
948 return false;
949}
a52b023a 950
a52b023a
PB
951/* Main entry point. */
952
953static bool
954gate_fwprop (void)
955{
956 return optimize > 0 && flag_forward_propagate;
957}
958
959static unsigned int
75da268e 960fwprop (bool fwprop_addr_p)
a52b023a 961{
a52b023a
PB
962 fwprop_init ();
963
0b76990a
RS
964 /* Go through all the instructions (including debug instructions) looking
965 for uses that we could propagate into.
a52b023a
PB
966
967 Do not forward propagate addresses into loops until after unrolling.
968 CSE did so because it was able to fix its own mess, but we are not. */
969
0b76990a 970 insn_info *next;
75da268e 971
0b76990a
RS
972 /* ??? This code uses a worklist in order to preserve the behavior
973 of the pre-SSA implementation. It would be better to instead
974 iterate on each instruction until no more propagations are
975 possible, then move on to the next. */
976 auto_vec<insn_info *> worklist;
977 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
978 {
979 next = insn->next_any_insn ();
980 if (insn->can_be_optimized () || insn->is_debug_insn ())
981 if (fwprop_insn (insn, fwprop_addr_p))
982 worklist.safe_push (insn);
983 }
984 for (unsigned int i = 0; i < worklist.length (); ++i)
985 {
986 insn_info *insn = worklist[i];
987 if (fwprop_insn (insn, fwprop_addr_p))
988 worklist.safe_push (insn);
a52b023a
PB
989 }
990
991 fwprop_done ();
a52b023a
PB
992 return 0;
993}
994
27a4cd48
DM
995namespace {
996
997const pass_data pass_data_rtl_fwprop =
a52b023a 998{
27a4cd48
DM
999 RTL_PASS, /* type */
1000 "fwprop1", /* name */
1001 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1002 TV_FWPROP, /* tv_id */
1003 0, /* properties_required */
1004 0, /* properties_provided */
1005 0, /* properties_destroyed */
1006 0, /* todo_flags_start */
3bea341f 1007 TODO_df_finish, /* todo_flags_finish */
a52b023a
PB
1008};
1009
27a4cd48
DM
1010class pass_rtl_fwprop : public rtl_opt_pass
1011{
1012public:
c3284718
RS
1013 pass_rtl_fwprop (gcc::context *ctxt)
1014 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
27a4cd48
DM
1015 {}
1016
1017 /* opt_pass methods: */
725793af
DM
1018 bool gate (function *) final override { return gate_fwprop (); }
1019 unsigned int execute (function *) final override { return fwprop (false); }
27a4cd48
DM
1020
1021}; // class pass_rtl_fwprop
1022
1023} // anon namespace
1024
1025rtl_opt_pass *
1026make_pass_rtl_fwprop (gcc::context *ctxt)
1027{
1028 return new pass_rtl_fwprop (ctxt);
1029}
1030
27a4cd48
DM
1031namespace {
1032
1033const pass_data pass_data_rtl_fwprop_addr =
a52b023a 1034{
27a4cd48
DM
1035 RTL_PASS, /* type */
1036 "fwprop2", /* name */
1037 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1038 TV_FWPROP, /* tv_id */
1039 0, /* properties_required */
1040 0, /* properties_provided */
1041 0, /* properties_destroyed */
1042 0, /* todo_flags_start */
3bea341f 1043 TODO_df_finish, /* todo_flags_finish */
a52b023a 1044};
27a4cd48
DM
1045
1046class pass_rtl_fwprop_addr : public rtl_opt_pass
1047{
1048public:
c3284718
RS
1049 pass_rtl_fwprop_addr (gcc::context *ctxt)
1050 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
27a4cd48
DM
1051 {}
1052
1053 /* opt_pass methods: */
725793af
DM
1054 bool gate (function *) final override { return gate_fwprop (); }
1055 unsigned int execute (function *) final override { return fwprop (true); }
27a4cd48
DM
1056
1057}; // class pass_rtl_fwprop_addr
1058
1059} // anon namespace
1060
1061rtl_opt_pass *
1062make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1063{
1064 return new pass_rtl_fwprop_addr (ctxt);
1065}
This page took 6.677643 seconds and 5 git commands to generate.