View | Details | Return to bug 96208
Collapse All | Expand All

(-)a/gcc/testsuite/gcc.dg/vect/vect-non-pow2-group.c (+25 lines)
Line 0 Link Here
1
/* { dg-do compile } */
2
/* { dg-require-effective-target vect_double } */
3
/* { dg-require-effective-target vect_perm } */
4
/* { dg-additional-options "-fdump-tree-vect-details -fno-vect-cost-model -Ofast" } */
5
6
typedef struct {
7
    double m1, m2, m3, m4, m5;
8
} the_struct_t;
9
10
double bar1 (the_struct_t*);
11
12
double foo (double* k, unsigned int n, the_struct_t* the_struct)
13
{
14
    unsigned int u;
15
    the_struct_t result;
16
    for (u=0; u < n; u++, k--) {
17
	result.m1 += (*k)*the_struct[u].m1;
18
	result.m2 += (*k)*the_struct[u].m2;
19
	result.m3 += (*k)*the_struct[u].m3;
20
	result.m4 += (*k)*the_struct[u].m4;
21
    }
22
    return bar1 (&result);
23
}
24
25
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
(-)a/gcc/tree-vect-data-refs.c (-19 / +193 lines)
Lines 5027-5032 vect_create_destination_var (tree scalar_dest, tree vectype) Link Here
5027
  return vec_dest;
5027
  return vec_dest;
5028
}
5028
}
5029
5029
5030
/* Function vect_all_2element_permutations_supported
5031
5032
   Returns TRUE if all possible permutations for 2-element
5033
   vectors are supported for requested mode.  */
5034
5035
bool
5036
vect_all_2element_permutations_supported (machine_mode mode)
5037
{
5038
  // check all possible permutations for 2-element vectors
5039
  // for 2 vectors it'll be all low and high combinations:
5040
  // ll={0, 2}, lh={0, 3}, hl={1,2}, hh={1,3}
5041
  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5042
  if (!known_eq (nelt, 2ULL))
5043
    return false;
5044
  vec_perm_builder sel (nelt, 2, 2);
5045
  sel.quick_grow (2);
5046
  sel[0] = 0;
5047
  sel[1] = 2;
5048
  vec_perm_indices ll (sel, 2, 2);
5049
  sel[1] = 3;
5050
  vec_perm_indices lh (sel, 2, 2);
5051
  sel[0] = 1;
5052
  vec_perm_indices hh (sel, 2, 2);
5053
  sel[1] = 2;
5054
  vec_perm_indices hl (sel, 2, 2);
5055
  return can_vec_perm_const_p (mode, ll)
5056
      && can_vec_perm_const_p (mode, lh)
5057
      && can_vec_perm_const_p (mode, hl)
5058
      && can_vec_perm_const_p (mode, hh);
5059
}
5060
5030
/* Function vect_grouped_store_supported.
5061
/* Function vect_grouped_store_supported.
5031
5062
5032
   Returns TRUE if interleave high and interleave low permutations
5063
   Returns TRUE if interleave high and interleave low permutations
Lines 5038-5050 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) Link Here
5038
  machine_mode mode = TYPE_MODE (vectype);
5069
  machine_mode mode = TYPE_MODE (vectype);
5039
5070
5040
  /* vect_permute_store_chain requires the group size to be equal to 3 or
5071
  /* vect_permute_store_chain requires the group size to be equal to 3 or
5041
     be a power of two.  */
5072
     be a power of two or 2-element vectors to be used.  */
5042
  if (count != 3 && exact_log2 (count) == -1)
5073
  if (count != 3 && exact_log2 (count) == -1
5074
       && !known_eq (GET_MODE_NUNITS (mode), 2ULL))
5043
    {
5075
    {
5044
      if (dump_enabled_p ())
5076
      if (dump_enabled_p ())
5045
	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5077
	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046
			 "the size of the group of accesses"
5078
			 "the size of the group of accesses"
5047
			 " is not a power of 2 or not eqaul to 3\n");
5079
			 " is not a power of 2 or not eqaul to 3"
5080
			 " and vector element number is not 2\n");
5048
      return false;
5081
      return false;
5049
    }
5082
    }
5050
5083
Lines 5113-5121 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) Link Here
5113
	    }
5146
	    }
5114
	  return true;
5147
	  return true;
5115
	}
5148
	}
5149
      else if (known_eq (GET_MODE_NUNITS (mode), 2ULL))
5150
	{
5151
	  return vect_all_2element_permutations_supported (mode);
5152
	}
5116
      else
5153
      else
5117
	{
5154
	{
5118
	  /* If length is not equal to 3 then only power of 2 is supported.  */
5155
	 /* If length is not equal to 3 and vectors are not 2-elements
5156
	    then only power of 2 is supported.  */
5119
	  gcc_assert (pow2p_hwi (count));
5157
	  gcc_assert (pow2p_hwi (count));
5120
	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5158
	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5121
5159
Lines 5239-5244 vect_permute_store_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5239
  tree data_ref;
5277
  tree data_ref;
5240
  tree perm3_mask_low, perm3_mask_high;
5278
  tree perm3_mask_low, perm3_mask_high;
5241
  unsigned int i, j, n, log_length = exact_log2 (length);
5279
  unsigned int i, j, n, log_length = exact_log2 (length);
5280
  poly_uint64 nelt_poly = TYPE_VECTOR_SUBPARTS (vectype);
5281
  unsigned int unelt;
5242
5282
5243
  result_chain->quick_grow (length);
5283
  result_chain->quick_grow (length);
5244
  memcpy (result_chain->address (), dr_chain.address (),
5284
  memcpy (result_chain->address (), dr_chain.address (),
Lines 5247-5253 vect_permute_store_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5247
  if (length == 3)
5287
  if (length == 3)
5248
    {
5288
    {
5249
      /* vect_grouped_store_supported ensures that this is constant.  */
5289
      /* vect_grouped_store_supported ensures that this is constant.  */
5250
      unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5290
      unsigned int nelt = nelt_poly.to_constant ();
5251
      unsigned int j0 = 0, j1 = 0, j2 = 0;
5291
      unsigned int j0 = 0, j1 = 0, j2 = 0;
5252
5292
5253
      vec_perm_builder sel (nelt, nelt, 1);
5293
      vec_perm_builder sel (nelt, nelt, 1);
Lines 5308-5320 vect_permute_store_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5308
	  (*result_chain)[j] = data_ref;
5348
	  (*result_chain)[j] = data_ref;
5309
	}
5349
	}
5310
    }
5350
    }
5311
  else
5351
  else if (pow2p_hwi (length))
5312
    {
5352
    {
5313
      /* If length is not equal to 3 then only power of 2 is supported.  */
5314
      gcc_assert (pow2p_hwi (length));
5315
5316
      /* The encoding has 2 interleaved stepped patterns.  */
5353
      /* The encoding has 2 interleaved stepped patterns.  */
5317
      poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
5354
      poly_uint64 nelt = nelt_poly;
5318
      vec_perm_builder sel (nelt, 2, 3);
5355
      vec_perm_builder sel (nelt, 2, 3);
5319
      sel.quick_grow (6);
5356
      sel.quick_grow (6);
5320
      for (i = 0; i < 3; i++)
5357
      for (i = 0; i < 3; i++)
Lines 5360-5365 vect_permute_store_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5360
		    length * sizeof (tree));
5397
		    length * sizeof (tree));
5361
	  }
5398
	  }
5362
    }
5399
    }
5400
  else
5401
    {
5402
      gcc_assert (known_eq (nelt_poly, 2ULL));
5403
      unelt = nelt_poly.to_constant ();
5404
      vec_perm_builder sel (unelt, unelt, 1);
5405
      sel.quick_grow (unelt);
5406
      vec_perm_indices indices;
5407
5408
      // perm_x_y == x-th element of 1st vector and y-th element of 2nd vector
5409
      tree perm_0_0, perm_0_1, perm_1_1;
5410
      sel[0] = 0;
5411
      sel[1] = 1 + unelt;
5412
      indices.new_vector (sel, 2, unelt);
5413
      perm_0_1 = vect_gen_perm_mask_checked (vectype, indices);
5414
5415
      sel[0] = 0;
5416
      sel[1] = 0 + unelt;
5417
      indices.new_vector (sel, 2, unelt);
5418
      perm_0_0 = vect_gen_perm_mask_checked (vectype, indices);
5419
5420
      sel[0] = 1;
5421
      sel[1] = 1 + unelt;
5422
      indices.new_vector (sel, 2, unelt);
5423
      perm_1_1 = vect_gen_perm_mask_checked (vectype, indices);
5424
5425
      if (length & 1)
5426
	{
5427
	  int idx1, idx2;
5428
	  tree currentPerm;
5429
	  for (j = 0; j < length; j++)
5430
	    {
5431
	      idx1 = (2 * j) % length;
5432
	      idx2 = (2 * j + 1) % length;
5433
	      vect1 = dr_chain[idx1];
5434
	      vect2 = dr_chain[idx2];
5435
	      data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm");
5436
	      if (j < length / 2) currentPerm = perm_0_0;
5437
	      else if (j == length / 2) currentPerm = perm_0_1;
5438
	      else currentPerm = perm_1_1;
5439
	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5440
					       vect2, currentPerm);
5441
	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5442
	      (*result_chain)[j] = data_ref;
5443
	    }
5444
	}
5445
      else
5446
	{
5447
	  for (i = 0; i < unelt; i++)
5448
	    {
5449
	      for (j = 0; j < length / 2; j++)
5450
		{
5451
		  vect1 = dr_chain[2 * j];
5452
		  vect2 = dr_chain[2 * j + 1];
5453
		  data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm");
5454
		  tree selectedPerm = (i == 0) ? perm_0_0 : perm_1_1;
5455
		  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5456
						   vect1, vect2, selectedPerm);
5457
		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
5458
					       gsi);
5459
		  (*result_chain)[i * length / 2 + j] = data_ref;
5460
		}
5461
	    }
5462
	}
5463
    }
5363
}
5464
}
5364
5465
5365
/* Function vect_setup_realignment
5466
/* Function vect_setup_realignment
Lines 5664-5675 vect_grouped_load_supported (tree vectype, bool single_element_p, Link Here
5664
5765
5665
  /* vect_permute_load_chain requires the group size to be equal to 3 or
5766
  /* vect_permute_load_chain requires the group size to be equal to 3 or
5666
     be a power of two.  */
5767
     be a power of two.  */
5667
  if (count != 3 && exact_log2 (count) == -1)
5768
  poly_uint64 poly_unelt = GET_MODE_NUNITS (mode);
5769
  if (count != 3 && exact_log2 (count) == -1
5770
      && !known_eq (poly_unelt, 2ULL))
5668
    {
5771
    {
5669
      if (dump_enabled_p ())
5772
      if (dump_enabled_p ())
5670
	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773
	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5671
			 "the size of the group of accesses"
5774
			 "the size of the group of accesses"
5672
			 " is not a power of 2 or not equal to 3\n");
5775
			 " is not a power of 2 or not equal to 3"
5776
			 " and vector elements number is not 2\n");
5673
      return false;
5777
      return false;
5674
    }
5778
    }
5675
5779
Lines 5726-5734 vect_grouped_load_supported (tree vectype, bool single_element_p, Link Here
5726
	    }
5830
	    }
5727
	  return true;
5831
	  return true;
5728
	}
5832
	}
5833
      else if (known_eq (poly_unelt, 2ULL))
5834
	{
5835
	  return vect_all_2element_permutations_supported (mode);
5836
	}
5729
      else
5837
      else
5730
	{
5838
	{
5731
	  /* If length is not equal to 3 then only power of 2 is supported.  */
5839
	  /* If length is not equal to 3 and vector size is not 2
5840
	  then only power of 2 is supported.  */
5732
	  gcc_assert (pow2p_hwi (count));
5841
	  gcc_assert (pow2p_hwi (count));
5733
	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5842
	  poly_uint64 nelt = GET_MODE_NUNITS (mode);
5734
5843
Lines 5862-5867 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5862
  gimple *perm_stmt;
5971
  gimple *perm_stmt;
5863
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5972
  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5864
  unsigned int i, j, log_length = exact_log2 (length);
5973
  unsigned int i, j, log_length = exact_log2 (length);
5974
  poly_uint64 nelt_poly = TYPE_VECTOR_SUBPARTS (vectype);
5865
5975
5866
  result_chain->quick_grow (length);
5976
  result_chain->quick_grow (length);
5867
  memcpy (result_chain->address (), dr_chain.address (),
5977
  memcpy (result_chain->address (), dr_chain.address (),
Lines 5870-5876 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5870
  if (length == 3)
5980
  if (length == 3)
5871
    {
5981
    {
5872
      /* vect_grouped_load_supported ensures that this is constant.  */
5982
      /* vect_grouped_load_supported ensures that this is constant.  */
5873
      unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
5983
      unsigned nelt = nelt_poly.to_constant ();
5874
      unsigned int k;
5984
      unsigned int k;
5875
5985
5876
      vec_perm_builder sel (nelt, nelt, 1);
5986
      vec_perm_builder sel (nelt, nelt, 1);
Lines 5917-5929 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5917
	  (*result_chain)[k] = data_ref;
6027
	  (*result_chain)[k] = data_ref;
5918
	}
6028
	}
5919
    }
6029
    }
5920
  else
6030
  else if (pow2p_hwi (length))
5921
    {
6031
    {
5922
      /* If length is not equal to 3 then only power of 2 is supported.  */
5923
      gcc_assert (pow2p_hwi (length));
5924
5925
      /* The encoding has a single stepped pattern.  */
6032
      /* The encoding has a single stepped pattern.  */
5926
      poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6033
      poly_uint64 nelt = nelt_poly;
5927
      vec_perm_builder sel (nelt, 1, 3);
6034
      vec_perm_builder sel (nelt, 1, 3);
5928
      sel.quick_grow (3);
6035
      sel.quick_grow (3);
5929
      for (i = 0; i < 3; ++i)
6036
      for (i = 0; i < 3; ++i)
Lines 5963-5968 vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain, Link Here
5963
		  length * sizeof (tree));
6070
		  length * sizeof (tree));
5964
	}
6071
	}
5965
    }
6072
    }
6073
  else
6074
    {
6075
      gcc_assert (known_eq (nelt_poly, 2ULL));
6076
      unsigned int unelt = nelt_poly.to_constant ();
6077
      vec_perm_builder sel (unelt, unelt, 1);
6078
      sel.quick_grow (unelt);
6079
      vec_perm_indices indices;
6080
6081
      // for 2-element vectors there are 2 cases to consider:
6082
      // 1) result vector Vi is {Vn[j], Vm[k]} and j == k
6083
      //    which means length is even.
6084
      // 2) result vector Vi is {Vn[j], Vm[k]} and j != k
6085
      //    which means length is odd.
6086
6087
      // perm_x_y == x-th element of 1st vector and y-th element of 2nd vector
6088
      tree perm_0_0, perm_0_1, perm_1_0, perm_1_1;
6089
      sel[0] = 0;
6090
      sel[1] = 1 + unelt;
6091
      indices.new_vector (sel, 2, unelt);
6092
      perm_0_1 = vect_gen_perm_mask_checked (vectype, indices);
6093
6094
      sel[0] = 1;
6095
      sel[1] = 0 + unelt;
6096
      indices.new_vector (sel, 2, unelt);
6097
      perm_1_0 = vect_gen_perm_mask_checked (vectype, indices);
6098
6099
      sel[0] = 0;
6100
      sel[1] = 0 + unelt;
6101
      indices.new_vector (sel, 2, unelt);
6102
      perm_0_0 = vect_gen_perm_mask_checked (vectype, indices);
6103
6104
      sel[0] = 1;
6105
      sel[1] = 1 + unelt;
6106
      indices.new_vector (sel, 2, unelt);
6107
      perm_1_1 = vect_gen_perm_mask_checked (vectype, indices);
6108
6109
      for (i = 0; i < length; i++)
6110
	{
6111
	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm");
6112
6113
	  first_vect = dr_chain[i / unelt];
6114
	  second_vect = dr_chain[(i + length) / unelt];
6115
6116
	  tree selectedPerm;
6117
	  if (length & 1)
6118
	    { // odd length. different indicies
6119
	      // odd i, first vector index is 0 and second vector index is 1
6120
	      // even i, first vector index is 1 and second vector index is 0
6121
	      selectedPerm = (i & 1) ? perm_1_0 : perm_0_1;
6122
	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6123
					       first_vect, second_vect,
6124
					       selectedPerm);
6125
	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6126
	      (*result_chain)[i] = data_ref;
6127
	    }
6128
	  else
6129
	    { // even length. same indicies (both 0 or both 1)
6130
	      selectedPerm = (i & 1) ? perm_1_1 : perm_0_0;
6131
	      perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6132
					       first_vect, second_vect,
6133
					       selectedPerm);
6134
	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6135
	      (*result_chain)[i] = data_ref;
6136
	    }
6137
	}
6138
    }
5966
}
6139
}
5967
6140
5968
/* Function vect_shift_permute_load_chain.
6141
/* Function vect_shift_permute_load_chain.
Lines 6340-6345 vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info, Link Here
6340
  mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
6513
  mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
6341
  if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
6514
  if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
6342
      || pow2p_hwi (size)
6515
      || pow2p_hwi (size)
6516
      || known_eq (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), 2ULL)
6343
      || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
6517
      || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
6344
					 gsi, &result_chain))
6518
					 gsi, &result_chain))
6345
    vect_permute_load_chain (vinfo, dr_chain,
6519
    vect_permute_load_chain (vinfo, dr_chain,

Return to bug 96208