From: Richard Sandiford Date: Wed, 3 Jul 2024 08:17:42 +0000 (+0100) Subject: Give fast DCE a separate dirty flag X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=47ea6bddd15a568cedc5d7026d2cc9d5599e6e01;p=gcc.git Give fast DCE a separate dirty flag Thomas pointed out that we sometimes failed to eliminate some dead code (specifically clobbers of otherwise unused registers) on nvptx when late-combine is enabled. This happens because: - combine is able to optimise the function in a way that exposes dead code. This leaves the df information in a "dirty" state. - late_combine calls df_analyze without DF_LR_RUN_DCE run set. This updates the df information and clears the "dirty" state. - late_combine doesn't find any extra optimisations, and so leaves the df information up-to-date. - if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set. Because the df information is already up-to-date, fast DCE is not run. The upshot is that running late-combine has the effect of suppressing a DCE opportunity that would have been noticed without late_combine. I think this shows that we should track the state of the DCE separately from the LR problem. Every pass updates the latter, but not all passes update the former. gcc/ * df.h (DF_LR_DCE): New df_problem_id. (df_lr_dce): New macro. * df-core.cc (rest_of_handle_df_finish): Check for a null free_fun. * df-problems.cc (df_lr_finalize): Split out fast DCE handling to... (df_lr_dce_finalize): ...this new function. (problem_LR_DCE): New df_problem. (df_lr_add_problem): Register LR_DCE rather than LR itself. * dce.cc (fast_dce): Clear df_lr_dce->solutions_dirty. --- diff --git a/gcc/dce.cc b/gcc/dce.cc index be1a2a877324..04e8d98818de 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -1182,6 +1182,9 @@ fast_dce (bool word_level) BITMAP_FREE (processed); BITMAP_FREE (redo_out); BITMAP_FREE (all_blocks); + + /* Both forms of DCE should make further DCE unnecessary. */ + df_lr_dce->solutions_dirty = false; } diff --git a/gcc/df-core.cc b/gcc/df-core.cc index b0e8a88d433b..8fd778a86183 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -806,7 +806,8 @@ rest_of_handle_df_finish (void) for (i = 0; i < df->num_problems_defined; i++) { struct dataflow *dflow = df->problems_in_order[i]; - dflow->problem->free_fun (); + if (dflow->problem->free_fun) + dflow->problem->free_fun (); } free (df->postorder); diff --git a/gcc/df-problems.cc b/gcc/df-problems.cc index 88ee0dd67fc3..bfd24bd1e86c 100644 --- a/gcc/df-problems.cc +++ b/gcc/df-problems.cc @@ -1054,37 +1054,10 @@ df_lr_transfer_function (int bb_index) } -/* Run the fast dce as a side effect of building LR. */ - static void -df_lr_finalize (bitmap all_blocks) +df_lr_finalize (bitmap) { df_lr->solutions_dirty = false; - if (df->changeable_flags & DF_LR_RUN_DCE) - { - run_fast_df_dce (); - - /* If dce deletes some instructions, we need to recompute the lr - solution before proceeding further. The problem is that fast - dce is a pessimestic dataflow algorithm. In the case where - it deletes a statement S inside of a loop, the uses inside of - S may not be deleted from the dataflow solution because they - were carried around the loop. While it is conservatively - correct to leave these extra bits, the standards of df - require that we maintain the best possible (least fixed - point) solution. The only way to do that is to redo the - iteration from the beginning. See PR35805 for an - example. */ - if (df_lr->solutions_dirty) - { - df_clear_flags (DF_LR_RUN_DCE); - df_lr_alloc (all_blocks); - df_lr_local_compute (all_blocks); - df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks); - df_lr_finalize (all_blocks); - df_set_flags (DF_LR_RUN_DCE); - } - } } @@ -1266,6 +1239,69 @@ static const struct df_problem problem_LR = false /* Reset blocks on dropping out of blocks_to_analyze. */ }; +/* Run the fast DCE after building LR. This is a separate problem so that + the "dirty" flag is only cleared after a DCE pass is actually run. */ + +static void +df_lr_dce_finalize (bitmap all_blocks) +{ + if (!(df->changeable_flags & DF_LR_RUN_DCE)) + return; + + /* Also clears df_lr_dce->solutions_dirty. */ + run_fast_df_dce (); + + /* If dce deletes some instructions, we need to recompute the lr + solution before proceeding further. The problem is that fast + dce is a pessimestic dataflow algorithm. In the case where + it deletes a statement S inside of a loop, the uses inside of + S may not be deleted from the dataflow solution because they + were carried around the loop. While it is conservatively + correct to leave these extra bits, the standards of df + require that we maintain the best possible (least fixed + point) solution. The only way to do that is to redo the + iteration from the beginning. See PR35805 for an + example. */ + if (df_lr->solutions_dirty) + { + df_clear_flags (DF_LR_RUN_DCE); + df_lr_alloc (all_blocks); + df_lr_local_compute (all_blocks); + df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks); + df_lr_finalize (all_blocks); + df_set_flags (DF_LR_RUN_DCE); + } +} + +static const struct df_problem problem_LR_DCE +{ + DF_LR_DCE, /* Problem id. */ + DF_BACKWARD, /* Direction (arbitrary). */ + NULL, /* Allocate the problem specific data. */ + NULL, /* Reset global information. */ + NULL, /* Free basic block info. */ + NULL, /* Local compute function. */ + NULL, /* Init the solution specific data. */ + NULL, /* Worklist solver. */ + NULL, /* Confluence operator 0. */ + NULL, /* Confluence operator n. */ + NULL, /* Transfer function. */ + df_lr_dce_finalize, /* Finalize function. */ + NULL, /* Free all of the problem information. */ + NULL, /* Remove this problem from the stack of dataflow problems. */ + NULL, /* Debugging. */ + NULL, /* Debugging start block. */ + NULL, /* Debugging end block. */ + NULL, /* Debugging start insn. */ + NULL, /* Debugging end insn. */ + NULL, /* Incremental solution verify start. */ + NULL, /* Incremental solution verify end. */ + &problem_LR, /* Dependent problem. */ + 0, /* Size of entry of block_info array. */ + TV_DF_LR, /* Timing variable. */ + false /* Reset blocks on dropping out of blocks_to_analyze. */ +}; + /* Create a new DATAFLOW instance and add it to an existing instance of DF. The returned structure is what is used to get at the @@ -1274,7 +1310,9 @@ static const struct df_problem problem_LR = void df_lr_add_problem (void) { - df_add_problem (&problem_LR); + /* Also add the fast DCE problem. It is then conditionally enabled by + the DF_LR_RUN_DCE flag. */ + df_add_problem (&problem_LR_DCE); /* These will be initialized when df_scan_blocks processes each block. */ df_lr->out_of_date_transfer_functions = BITMAP_ALLOC (&df_bitmap_obstack); diff --git a/gcc/df.h b/gcc/df.h index c4e690b40cf2..bbb4f297b47d 100644 --- a/gcc/df.h +++ b/gcc/df.h @@ -47,6 +47,7 @@ enum df_problem_id { DF_SCAN, DF_LR, /* Live Registers backward. */ + DF_LR_DCE, /* Dead code elimination post-pass for LR. */ DF_LIVE, /* Live Registers & Uninitialized Registers */ DF_RD, /* Reaching Defs. */ DF_CHAIN, /* Def-Use and/or Use-Def Chains. */ @@ -940,6 +941,7 @@ extern class df_d *df; #define df_scan (df->problems_by_index[DF_SCAN]) #define df_rd (df->problems_by_index[DF_RD]) #define df_lr (df->problems_by_index[DF_LR]) +#define df_lr_dce (df->problems_by_index[DF_LR_DCE]) #define df_live (df->problems_by_index[DF_LIVE]) #define df_chain (df->problems_by_index[DF_CHAIN]) #define df_word_lr (df->problems_by_index[DF_WORD_LR])