This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[autovect] Vectorize hot SPEC gzip loop


This patch adds redundant cast elimination as part of forward propagation pass. After discussions last week, I understand that tree combiner pass will take care of it. Since someone has volunteered to implement it I will move forward with this small clean patch. When tree combiner is ready, we can get rid of this.

Now gzip loop is vectorized and I get 6% improvement on gzip (using single processor 1.8 Ghz G5).

2004-12-08 Devang Patel <dpatel@apple.com>

* tree-ssa-forwprop.c (cast_conversion_assignment_p): New.
(replace_use_in_cond_expr): New.
(all_uses_are_replacable): New.
(eliminate_unnecessary_casts): New.
(tree_ssa_forward_propagate_single_use_var): Eliminate unnecessary
casts.


2004-12-08 Devang Patel <dpatel@apple.com>

        * config/rs6000/rs6000.c (rs6000_emit_vector_select): Adjust
        vector select insn parameters.
-
Devang


Index: tree-ssa-forwprop.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-forwprop.c,v retrieving revision 2.8 diff -Idpatel.pbxuser -c -3 -p -r2.8 tree-ssa-forwprop.c *** tree-ssa-forwprop.c 24 Sep 2004 13:26:29 -0000 2.8 --- tree-ssa-forwprop.c 8 Dec 2004 16:33:24 -0000 *************** substitute_single_use_vars (varray_type *** 479,484 **** --- 479,705 ---- } }

+ /* Return TRUE iff STMT is a MODIFY_EXPR of the form
+ x = (cast) y;
+ */
+ static bool cast_conversion_assignment_p (tree stmt)
+ {
+ tree lhs, rhs;
+
+ gcc_assert (stmt);
+
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ return false;
+
+ lhs = TREE_OPERAND (stmt, 0);
+ rhs = TREE_OPERAND (stmt, 1);
+ if ((TREE_CODE (rhs) == NOP_EXPR
+ || TREE_CODE (rhs) == CONVERT_EXPR)
+ && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME
+ && TREE_CODE (lhs) == SSA_NAME)
+ return true;
+
+ return false;
+ }
+
+ /* STMT is a comparision and it uses DEF_STMT.
+ DEF_STMT is a modify expression that applys cast.
+ Return TRUE iff, it is OK to replace use of DEF_STMT by LHS's
+ inner type. If NEW_STMT is not NULL then */
+ static bool
+ replacable_use_in_cond_expr (tree stmt, tree def_stmt, tree *new_stmt)
+ {
+ tree op0, op1, candidate_op, other_op, temp, def_rhs, def_rhs_inner_type;
+
+ gcc_assert (stmt);
+ gcc_assert (def_stmt);
+ gcc_assert (COMPARISON_CLASS_P (stmt));
+ gcc_assert (TREE_CODE (def_stmt) == MODIFY_EXPR);
+
+ candidate_op = NULL_TREE;
+ other_op = NULL_TREE;
+
+ op0 = TREE_OPERAND (stmt, 0);
+ op1 = TREE_OPERAND (stmt, 1);
+
+ if (SSA_NAME_DEF_STMT (op0) == def_stmt
+ && is_gimple_min_invariant (op1))
+ {
+ candidate_op = op0;
+ other_op = op1;
+ }
+ else if (SSA_NAME_DEF_STMT (op1) == def_stmt
+ && is_gimple_min_invariant (op0))
+ {
+ candidate_op = op1;
+ other_op = op0;
+ }
+ else
+ return false;
+
+ if (!cast_conversion_assignment_p (def_stmt))
+ return false;
+
+ /* What we want to prove is that if we convert CANDIDATE_OP to
+ the type of the object inside the NOP_EXPR that the
+ result is still equivalent to SRC. */
+ def_rhs = TREE_OPERAND (def_stmt, 1);
+ def_rhs_inner_type = TREE_TYPE (TREE_OPERAND (def_rhs, 0));
+ temp = build1 (TREE_CODE (def_rhs), def_rhs_inner_type, other_op);
+ temp = fold (temp);
+ if (is_gimple_val (temp) && tree_int_cst_equal (temp, other_op))
+ {
+ if (new_stmt)
+ *new_stmt = build (TREE_CODE (stmt), TREE_TYPE (stmt),
+ TREE_OPERAND (def_rhs, 0), temp);
+
+ return true;
+ }
+ return false;
+ }
+
+ /* Return TRUE iff all uses of STMT are candidate for replacement. */
+ static bool
+ all_uses_are_replacable (tree stmt, bool replace)
+ {
+ dataflow_t df;
+ int j, num_uses;
+ bool replacable = true;
+
+ /* Now compute the immediate uses of TEST_VAR. */
+ df = get_immediate_uses (stmt);
+ num_uses = num_immediate_uses (df);
+
+ for (j = 0; j < num_uses && replacable; j++)
+ {
+ tree use = immediate_use (df, j);
+
+ if (replace && dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Removing casts");
+ print_generic_expr (dump_file, use, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+
+ if (TREE_CODE (use) == MODIFY_EXPR)
+ {
+ replacable = cast_conversion_assignment_p (use);
+ if (replace)
+ {
+ /* Before
+
+ STMT: a = (cast) b;
+ USE: c = (undo_cast) a;
+
+ After
+
+ USE: c = b;
+ */
+ tree def_rhs = TREE_OPERAND (stmt, 1);
+ tree def_rhs_inner = TREE_OPERAND (def_rhs, 0);
+ tree use_lhs = TREE_OPERAND (use, 0);
+
+ tree def_rhs_inner_type = TREE_TYPE (def_rhs_inner);
+ tree use_lhs_type = TREE_TYPE (use_lhs);
+
+ if ((TYPE_PRECISION (def_rhs_inner_type)
+ == TYPE_PRECISION (use_lhs_type))
+ && (TYPE_MAIN_VARIANT (def_rhs_inner_type)
+ == TYPE_MAIN_VARIANT (use_lhs_type)))
+ {
+ TREE_OPERAND (use, 1) = TREE_OPERAND (def_rhs, 0);
+ modify_stmt (use);
+ }
+ }
+ }
+ else if (TREE_CODE (use) == COND_EXPR
+ && COMPARISON_CLASS_P (COND_EXPR_COND (use)))
+ {
+ if (replace)
+ {
+ tree new_cond = NULL_TREE;
+ replacable = replacable_use_in_cond_expr (COND_EXPR_COND (use),
+ stmt, &new_cond);
+ if (new_cond)
+ {
+ COND_EXPR_COND (use) = new_cond;
+ modify_stmt (use);
+ }
+ else
+ abort ();
+ }
+ else
+ replacable = replacable_use_in_cond_expr (COND_EXPR_COND (use),
+ stmt, NULL);
+ }
+ else
+ replacable = false;
+ }
+
+ return replacable;
+ }
+
+ static void
+ eliminate_unnecessary_casts (void)
+ {
+ basic_block bb;
+ block_stmt_iterator bsi;
+ varray_type worklist;
+
+ /* Memory allocation. */
+ vars = BITMAP_XMALLOC ();
+ VARRAY_TREE_INIT (worklist, 10, "worklist");
+ FOR_EACH_BB (bb)
+ {
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+
+ /* If stmt is of the form
+ x = (cast) y;
+ then make x candidate. */
+
+ if (cast_conversion_assignment_p (stmt))
+ {
+ tree lhs = TREE_OPERAND (stmt, 0);
+ tree rhs = TREE_OPERAND (stmt, 1);
+ tree rhs_inner = TREE_OPERAND (rhs, 0);
+ tree rhs_type = TREE_TYPE (rhs);
+ tree rhs_inner_type = TREE_TYPE (rhs_inner);
+
+ if ((TYPE_PRECISION (rhs_inner_type) <= TYPE_PRECISION (rhs_type))
+ && (TYPE_UNSIGNED (rhs_inner_type) == TYPE_UNSIGNED (rhs_type)))
+ {
+ bitmap_set_bit (vars, SSA_NAME_VERSION (lhs));
+ VARRAY_PUSH_TREE (worklist, stmt);
+ }
+ }
+ }
+ }
+
+ /* Now compute immidiate uses for candidates. */
+ compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
+ while (VARRAY_ACTIVE_SIZE (worklist) > 0)
+ {
+ tree stmt = VARRAY_TOP_TREE (worklist);
+ VARRAY_POP (worklist);
+
+ if (all_uses_are_replacable (stmt, false /* Do not replace */))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,"File name = %s\n", __FILE__);
+ fprintf (dump_file,"Input name = %s\n", main_input_filename);
+ }
+ all_uses_are_replacable (stmt, true /* Replace */);
+ }
+
+ }
+ /* Cleanup */
+ free_df ();
+ BITMAP_XFREE (vars);
+ }
+
/* Main entry point for the forward propagation optimizer. */


  static void
*************** tree_ssa_forward_propagate_single_use_va
*** 487,492 ****
--- 708,715 ----
    basic_block bb;
    varray_type vars_worklist, cond_worklist;

+ eliminate_unnecessary_casts ();
+
vars = BITMAP_XMALLOC ();
VARRAY_TREE_INIT (vars_worklist, 10, "VARS worklist");
VARRAY_TREE_INIT (cond_worklist, 10, "COND worklist");
Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.739.2.1
diff -Idpatel.pbxuser -c -3 -p -r1.739.2.1 rs6000.c
*** config/rs6000/rs6000.c 6 Dec 2004 21:36:07 -0000 1.739.2.1
--- config/rs6000/rs6000.c 8 Dec 2004 16:33:25 -0000
*************** rs6000_emit_vector_select (rtx dest, rtx
*** 11549,11555 ****


    t = gen_rtx_fmt_ee (SET, VOIDmode, temp,
                      gen_rtx_fmt_Ei (UNSPEC, dest_mode,
!                                     gen_rtvec (3, op1, op2, mask),
                                      vsel_insn_index));
    emit_insn (t);
    emit_move_insn (dest, temp);
--- 11549,11555 ----

    t = gen_rtx_fmt_ee (SET, VOIDmode, temp,
                      gen_rtx_fmt_Ei (UNSPEC, dest_mode,
!                                     gen_rtvec (3, op2, op1, mask),
                                      vsel_insn_index));
    emit_insn (t);
    emit_move_insn (dest, temp);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]