[autovect] Vectorize hot SPEC gzip loop

Devang Patel dpatel@apple.com
Wed Dec 8 16:42:00 GMT 2004


This patch adds redundant cast elimination as part of forward 
propagation pass. After discussions last week, I understand that tree 
combiner pass will take care of it. Since someone has volunteered to 
implement it I will move forward with this small clean patch. When tree 
combiner is ready, we can get rid of this.

Now gzip loop is vectorized and I get 6% improvement on gzip (using 
single processor 1.8 Ghz G5).

2004-12-08 Devang Patel  <dpatel@apple.com>

         * tree-ssa-forwprop.c (cast_conversion_assignment_p): New.
         (replace_use_in_cond_expr): New.
         (all_uses_are_replacable): New.
         (eliminate_unnecessary_casts): New.
         (tree_ssa_forward_propagate_single_use_var): Eliminate 
unnecessary
         casts.

2004-12-08 Devang Patel  <dpatel@apple.com>

         * config/rs6000/rs6000.c (rs6000_emit_vector_select): Adjust
         vector select insn parameters.
-
Devang


Index: tree-ssa-forwprop.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-forwprop.c,v
retrieving revision 2.8
diff -Idpatel.pbxuser -c -3 -p -r2.8 tree-ssa-forwprop.c
*** tree-ssa-forwprop.c 24 Sep 2004 13:26:29 -0000      2.8
--- tree-ssa-forwprop.c 8 Dec 2004 16:33:24 -0000
*************** substitute_single_use_vars (varray_type
*** 479,484 ****
--- 479,705 ----
       }
   }

+ /* Return TRUE iff STMT is a MODIFY_EXPR of the form
+    x = (cast) y;
+ */
+ static bool cast_conversion_assignment_p (tree stmt)
+ {
+   tree lhs, rhs;
+
+   gcc_assert (stmt);
+
+   if (TREE_CODE (stmt) != MODIFY_EXPR)
+     return false;
+
+   lhs = TREE_OPERAND (stmt, 0);
+   rhs = TREE_OPERAND (stmt, 1);
+   if ((TREE_CODE (rhs) == NOP_EXPR
+        || TREE_CODE (rhs) == CONVERT_EXPR)
+       && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME
+       && TREE_CODE (lhs) == SSA_NAME)
+     return true;
+
+   return false;
+ }
+
+ /* STMT is a comparision and it uses DEF_STMT.
+    DEF_STMT is a modify expression that applys cast.
+    Return TRUE iff, it is OK to replace use of DEF_STMT by LHS's
+    inner type.  If NEW_STMT is not NULL then */
+ static bool
+ replacable_use_in_cond_expr (tree stmt, tree def_stmt, tree *new_stmt)
+ {
+   tree op0, op1, candidate_op, other_op, temp, def_rhs, 
def_rhs_inner_type;
+
+   gcc_assert (stmt);
+   gcc_assert (def_stmt);
+   gcc_assert (COMPARISON_CLASS_P (stmt));
+   gcc_assert (TREE_CODE (def_stmt) == MODIFY_EXPR);
+
+   candidate_op = NULL_TREE;
+   other_op = NULL_TREE;
+
+   op0 = TREE_OPERAND (stmt, 0);
+   op1 = TREE_OPERAND (stmt, 1);
+
+   if (SSA_NAME_DEF_STMT (op0) == def_stmt
+       && is_gimple_min_invariant (op1))
+     {
+       candidate_op = op0;
+       other_op = op1;
+     }
+   else if (SSA_NAME_DEF_STMT (op1) == def_stmt
+          && is_gimple_min_invariant (op0))
+     {
+       candidate_op = op1;
+       other_op = op0;
+     }
+   else
+     return false;
+
+   if (!cast_conversion_assignment_p (def_stmt))
+     return false;
+
+   /* What we want to prove is that if we convert CANDIDATE_OP to
+      the type of the object inside the NOP_EXPR that the
+      result is still equivalent to SRC.  */
+   def_rhs = TREE_OPERAND (def_stmt, 1);
+   def_rhs_inner_type = TREE_TYPE (TREE_OPERAND (def_rhs, 0));
+   temp = build1 (TREE_CODE (def_rhs), def_rhs_inner_type, other_op);
+   temp = fold (temp);
+   if (is_gimple_val (temp) && tree_int_cst_equal (temp, other_op))
+     {
+       if (new_stmt)
+       *new_stmt = build (TREE_CODE (stmt), TREE_TYPE (stmt),
+                          TREE_OPERAND (def_rhs, 0), temp);
+
+       return true;
+     }
+   return false;
+ }
+
+ /* Return TRUE iff all uses of STMT are candidate for replacement.  */
+ static bool
+ all_uses_are_replacable (tree stmt, bool replace)
+ {
+   dataflow_t df;
+   int j, num_uses;
+   bool replacable = true;
+
+   /* Now compute the immediate uses of TEST_VAR.  */
+   df = get_immediate_uses (stmt);
+   num_uses = num_immediate_uses (df);
+
+   for (j = 0; j < num_uses && replacable; j++)
+     {
+       tree use = immediate_use (df, j);
+
+       if (replace && dump_file && (dump_flags & TDF_DETAILS))
+       {
+         fprintf (dump_file, "  Removing casts");
+         print_generic_expr (dump_file, use, dump_flags);
+         fprintf (dump_file, "\n");
+       }
+
+       if (TREE_CODE (use) == MODIFY_EXPR)
+       {
+         replacable = cast_conversion_assignment_p (use);
+         if (replace)
+           {
+             /* Before
+
+                STMT: a = (cast) b;
+                USE: c = (undo_cast) a;
+
+                After
+
+                USE: c = b;
+             */
+             tree def_rhs = TREE_OPERAND (stmt, 1);
+             tree def_rhs_inner = TREE_OPERAND (def_rhs, 0);
+             tree use_lhs = TREE_OPERAND (use, 0);
+
+             tree def_rhs_inner_type = TREE_TYPE (def_rhs_inner);
+             tree use_lhs_type = TREE_TYPE (use_lhs);
+
+             if ((TYPE_PRECISION (def_rhs_inner_type)
+                  == TYPE_PRECISION (use_lhs_type))
+                  && (TYPE_MAIN_VARIANT (def_rhs_inner_type)
+                      == TYPE_MAIN_VARIANT (use_lhs_type)))
+             {
+               TREE_OPERAND (use, 1) = TREE_OPERAND (def_rhs, 0);
+               modify_stmt (use);
+             }
+           }
+       }
+       else if (TREE_CODE (use) == COND_EXPR
+              && COMPARISON_CLASS_P (COND_EXPR_COND (use)))
+       {
+         if (replace)
+           {
+             tree new_cond = NULL_TREE;
+             replacable = replacable_use_in_cond_expr (COND_EXPR_COND 
(use),
+                                                       stmt, 
&new_cond);
+             if (new_cond)
+               {
+                 COND_EXPR_COND (use) = new_cond;
+                 modify_stmt (use);
+               }
+             else
+               abort ();
+           }
+         else
+           replacable = replacable_use_in_cond_expr (COND_EXPR_COND 
(use),
+                                                     stmt, NULL);
+       }
+       else
+       replacable = false;
+     }
+
+   return  replacable;
+ }
+
+ static void
+ eliminate_unnecessary_casts (void)
+ {
+   basic_block bb;
+   block_stmt_iterator bsi;
+   varray_type worklist;
+
+   /* Memory allocation.  */
+   vars = BITMAP_XMALLOC ();
+   VARRAY_TREE_INIT (worklist, 10, "worklist");
+   FOR_EACH_BB (bb)
+     {
+       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+       {
+         tree stmt = bsi_stmt (bsi);
+
+         /* If stmt is of the form
+              x = (cast) y;
+            then make x candidate.  */
+
+         if (cast_conversion_assignment_p (stmt))
+           {
+             tree lhs = TREE_OPERAND (stmt, 0);
+             tree rhs = TREE_OPERAND (stmt, 1);
+             tree rhs_inner = TREE_OPERAND (rhs, 0);
+             tree rhs_type = TREE_TYPE (rhs);
+             tree rhs_inner_type = TREE_TYPE (rhs_inner);
+
+             if ((TYPE_PRECISION (rhs_inner_type) <= TYPE_PRECISION 
(rhs_type))
+                 && (TYPE_UNSIGNED (rhs_inner_type) == TYPE_UNSIGNED 
(rhs_type)))
+               {
+                 bitmap_set_bit (vars, SSA_NAME_VERSION (lhs));
+                 VARRAY_PUSH_TREE (worklist, stmt);
+               }
+           }
+       }
+     }
+
+   /* Now compute immidiate uses for candidates.  */
+   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
+   while (VARRAY_ACTIVE_SIZE (worklist) > 0)
+     {
+       tree stmt = VARRAY_TOP_TREE (worklist);
+       VARRAY_POP (worklist);
+
+       if (all_uses_are_replacable (stmt, false /* Do not replace */))
+         {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+             {
+           fprintf (dump_file,"File name = %s\n", __FILE__);
+           fprintf (dump_file,"Input name = %s\n", 
main_input_filename);
+             }
+         all_uses_are_replacable (stmt, true /* Replace */);
+         }
+
+     }
+   /* Cleanup */
+   free_df ();
+   BITMAP_XFREE (vars);
+ }
+
   /* Main entry point for the forward propagation optimizer.  */

   static void
*************** tree_ssa_forward_propagate_single_use_va
*** 487,492 ****
--- 708,715 ----
     basic_block bb;
     varray_type vars_worklist, cond_worklist;

+   eliminate_unnecessary_casts ();
+
     vars = BITMAP_XMALLOC ();
     VARRAY_TREE_INIT (vars_worklist, 10, "VARS worklist");
     VARRAY_TREE_INIT (cond_worklist, 10, "COND worklist");
Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.739.2.1
diff -Idpatel.pbxuser -c -3 -p -r1.739.2.1 rs6000.c
*** config/rs6000/rs6000.c      6 Dec 2004 21:36:07 -0000       
1.739.2.1
--- config/rs6000/rs6000.c      8 Dec 2004 16:33:25 -0000
*************** rs6000_emit_vector_select (rtx dest, rtx
*** 11549,11555 ****

     t = gen_rtx_fmt_ee (SET, VOIDmode, temp,
                       gen_rtx_fmt_Ei (UNSPEC, dest_mode,
!                                     gen_rtvec (3, op1, op2, mask),
                                       vsel_insn_index));
     emit_insn (t);
     emit_move_insn (dest, temp);
--- 11549,11555 ----

     t = gen_rtx_fmt_ee (SET, VOIDmode, temp,
                       gen_rtx_fmt_Ei (UNSPEC, dest_mode,
!                                     gen_rtvec (3, op2, op1, mask),
                                       vsel_insn_index));
     emit_insn (t);
     emit_move_insn (dest, temp);



More information about the Gcc-patches mailing list