View | Details | Return to bug 100089
Collapse All | Expand All

(-)a/gcc/tree-vect-slp.c (-14 / +56 lines)
Lines 5287-5293 li_cost_vec_cmp (const void *a_, const void *b_) Link Here
5287
5287
5288
static bool
5288
static bool
5289
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
5289
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
5290
				    vec<slp_instance> slp_instances)
5290
				    vec<slp_instance> slp_instances,
5291
				    loop_p orig_loop)
5291
{
5292
{
5292
  slp_instance instance;
5293
  slp_instance instance;
5293
  int i;
5294
  int i;
Lines 5324-5329 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, Link Here
5324
      vector_costs.safe_splice (instance->cost_vec);
5325
      vector_costs.safe_splice (instance->cost_vec);
5325
      instance->cost_vec.release ();
5326
      instance->cost_vec.release ();
5326
    }
5327
    }
5328
  /* When we're vectorizing an if-converted loop body with the
5329
     very-cheap cost model make sure we vectorized all if-converted
5330
     code.  */
5331
  bool force_not_profitable = false;
5332
  if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
5333
    {
5334
      gcc_assert (bb_vinfo->bbs.length () == 1);
5335
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
5336
	   !gsi_end_p (gsi); gsi_next (&gsi))
5337
	{
5338
	  /* The costing above left us with DCEable vectorized scalar
5339
	     stmts having the visited flag set.  */
5340
	  if (gimple_visited_p (gsi_stmt (gsi)))
5341
	    continue;
5342
5343
	  if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
5344
	    if (gimple_assign_rhs_code (ass) == COND_EXPR)
5345
	      {
5346
		force_not_profitable = true;
5347
		break;
5348
	      }
5349
	}
5350
    }
5351
5327
  /* Unset visited flag.  */
5352
  /* Unset visited flag.  */
5328
  stmt_info_for_cost *cost;
5353
  stmt_info_for_cost *cost;
5329
  FOR_EACH_VEC_ELT (scalar_costs, i, cost)
5354
  FOR_EACH_VEC_ELT (scalar_costs, i, cost)
Lines 5448-5456 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, Link Here
5448
      return false;
5473
      return false;
5449
    }
5474
    }
5450
5475
5476
  if (dump_enabled_p () && force_not_profitable)
5477
    dump_printf_loc (MSG_NOTE, vect_location,
5478
		     "not profitable because of unprofitable if-converted "
5479
		     "scalar code\n");
5480
5451
  scalar_costs.release ();
5481
  scalar_costs.release ();
5452
  vector_costs.release ();
5482
  vector_costs.release ();
5453
  return true;
5483
  return !force_not_profitable;
5454
}
5484
}
5455
5485
5456
/* qsort comparator for lane defs.  */
5486
/* qsort comparator for lane defs.  */
Lines 5895-5901 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, Link Here
5895
5925
5896
static bool
5926
static bool
5897
vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
5927
vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
5898
		 vec<int> *dataref_groups, unsigned int n_stmts)
5928
		 vec<int> *dataref_groups, unsigned int n_stmts,
5929
		 loop_p orig_loop)
5899
{
5930
{
5900
  bb_vec_info bb_vinfo;
5931
  bb_vec_info bb_vinfo;
5901
  auto_vector_modes vector_modes;
5932
  auto_vector_modes vector_modes;
Lines 5944-5950 vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, Link Here
5944
	      vect_location = instance->location ();
5975
	      vect_location = instance->location ();
5945
	      if (!unlimited_cost_model (NULL)
5976
	      if (!unlimited_cost_model (NULL)
5946
		  && !vect_bb_vectorization_profitable_p
5977
		  && !vect_bb_vectorization_profitable_p
5947
			(bb_vinfo, instance->subgraph_entries))
5978
			(bb_vinfo,
5979
			 orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
5980
			 : instance->subgraph_entries, orig_loop))
5948
		{
5981
		{
5949
		  for (slp_instance inst : instance->subgraph_entries)
5982
		  for (slp_instance inst : instance->subgraph_entries)
5950
		    if (inst->kind == slp_inst_kind_bb_reduc)
5983
		    if (inst->kind == slp_inst_kind_bb_reduc)
Lines 5965-5971 vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, Link Here
5965
				 "using SLP\n");
5998
				 "using SLP\n");
5966
	      vectorized = true;
5999
	      vectorized = true;
5967
6000
5968
	      vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
6001
	      vect_schedule_slp (bb_vinfo,
6002
				 orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
6003
				 : instance->subgraph_entries);
5969
6004
5970
	      unsigned HOST_WIDE_INT bytes;
6005
	      unsigned HOST_WIDE_INT bytes;
5971
	      if (dump_enabled_p ())
6006
	      if (dump_enabled_p ())
Lines 5980-5985 vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, Link Here
5980
				     "basic block part vectorized using "
6015
				     "basic block part vectorized using "
5981
				     "variable length vectors\n");
6016
				     "variable length vectors\n");
5982
		}
6017
		}
6018
6019
	      /* When we're called from loop vectorization we're considering
6020
		 all subgraphs at once.  */
6021
	      if (orig_loop)
6022
		break;
5983
	    }
6023
	    }
5984
	}
6024
	}
5985
      else
6025
      else
Lines 6047-6053 vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, Link Here
6047
   true if anything in the basic-block was vectorized.  */
6087
   true if anything in the basic-block was vectorized.  */
6048
6088
6049
static bool
6089
static bool
6050
vect_slp_bbs (const vec<basic_block> &bbs)
6090
vect_slp_bbs (const vec<basic_block> &bbs, loop_p orig_loop)
6051
{
6091
{
6052
  vec<data_reference_p> datarefs = vNULL;
6092
  vec<data_reference_p> datarefs = vNULL;
6053
  auto_vec<int> dataref_groups;
6093
  auto_vec<int> dataref_groups;
Lines 6077-6094 vect_slp_bbs (const vec<basic_block> &bbs) Link Here
6077
      ++current_group;
6117
      ++current_group;
6078
    }
6118
    }
6079
6119
6080
  return vect_slp_region (bbs, datarefs, &dataref_groups, insns);
6120
  return vect_slp_region (bbs, datarefs, &dataref_groups, insns, orig_loop);
6081
}
6121
}
6082
6122
6083
/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
6123
/* Special entry for the BB vectorizer.  Analyze and transform a single
6084
   true if anything in the basic-block was vectorized.  */
6124
   if-converted BB with ORIG_LOOPs body being the not if-converted
6125
   representation.  Returns true if anything in the basic-block was
6126
   vectorized.  */
6085
6127
6086
bool
6128
bool
6087
vect_slp_bb (basic_block bb)
6129
vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop)
6088
{
6130
{
6089
  auto_vec<basic_block> bbs;
6131
  auto_vec<basic_block> bbs;
6090
  bbs.safe_push (bb);
6132
  bbs.safe_push (bb);
6091
  return vect_slp_bbs (bbs);
6133
  return vect_slp_bbs (bbs, orig_loop);
6092
}
6134
}
6093
6135
6094
/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
6136
/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
Lines 6139-6145 vect_slp_function (function *fun) Link Here
6139
6181
6140
      if (split && !bbs.is_empty ())
6182
      if (split && !bbs.is_empty ())
6141
	{
6183
	{
6142
	  r |= vect_slp_bbs (bbs);
6184
	  r |= vect_slp_bbs (bbs, NULL);
6143
	  bbs.truncate (0);
6185
	  bbs.truncate (0);
6144
	  bbs.quick_push (bb);
6186
	  bbs.quick_push (bb);
6145
	}
6187
	}
Lines 6157-6169 vect_slp_function (function *fun) Link Here
6157
	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6199
	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158
			       "splitting region at control altering "
6200
			       "splitting region at control altering "
6159
			       "definition %G", last);
6201
			       "definition %G", last);
6160
	    r |= vect_slp_bbs (bbs);
6202
	    r |= vect_slp_bbs (bbs, NULL);
6161
	    bbs.truncate (0);
6203
	    bbs.truncate (0);
6162
	  }
6204
	  }
6163
    }
6205
    }
6164
6206
6165
  if (!bbs.is_empty ())
6207
  if (!bbs.is_empty ())
6166
    r |= vect_slp_bbs (bbs);
6208
    r |= vect_slp_bbs (bbs, NULL);
6167
6209
6168
  free (rpo);
6210
  free (rpo);
6169
6211
(-)a/gcc/tree-vectorizer.c (-9 / +11 lines)
Lines 1033-1042 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, Link Here
1033
	 only non-if-converted parts took part in BB vectorization.  */
1033
	 only non-if-converted parts took part in BB vectorization.  */
1034
      if (flag_tree_slp_vectorize != 0
1034
      if (flag_tree_slp_vectorize != 0
1035
	  && loop_vectorized_call
1035
	  && loop_vectorized_call
1036
	  && ! loop->inner
1036
	  && ! loop->inner)
1037
	  /* This would purely be a workaround and should be removed
1038
	     once PR100089 is fixed.  */
1039
	  && flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
1040
	{
1037
	{
1041
	  basic_block bb = loop->header;
1038
	  basic_block bb = loop->header;
1042
	  bool require_loop_vectorize = false;
1039
	  bool require_loop_vectorize = false;
Lines 1062-1073 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, Link Here
1062
	      gimple_set_uid (stmt, -1);
1059
	      gimple_set_uid (stmt, -1);
1063
	      gimple_set_visited (stmt, false);
1060
	      gimple_set_visited (stmt, false);
1064
	    }
1061
	    }
1065
	  if (!require_loop_vectorize && vect_slp_bb (bb))
1062
	  if (!require_loop_vectorize)
1066
	    {
1063
	    {
1067
	      fold_loop_internal_call (loop_vectorized_call,
1064
	      tree arg = gimple_call_arg (loop_vectorized_call, 1);
1068
				       boolean_true_node);
1065
	      class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
1069
	      loop_vectorized_call = NULL;
1066
	      if (vect_slp_if_converted_bb (bb, scalar_loop))
1070
	      ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1067
		{
1068
		  fold_loop_internal_call (loop_vectorized_call,
1069
					   boolean_true_node);
1070
		  loop_vectorized_call = NULL;
1071
		  ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
1072
		}
1071
	    }
1073
	    }
1072
	}
1074
	}
1073
      /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
1075
      /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
(-)a/gcc/tree-vectorizer.h (-2 / +1 lines)
Lines 2087-2093 extern void vect_gather_slp_loads (vec_info *); Link Here
2087
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
2087
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
2088
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
2088
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
2089
			       unsigned n = -1U);
2089
			       unsigned n = -1U);
2090
extern bool vect_slp_bb (basic_block);
2090
extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
2091
extern bool vect_slp_function (function *);
2091
extern bool vect_slp_function (function *);
2092
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
2092
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
2093
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
2093
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
2094
- 

Return to bug 100089