View | Details | Return to bug 80846 | Differences between
and this patch

Collapse All | Expand All

(-)gcc/tree-vect-loop.c (-25 / +68 lines)
Lines 4899-4932 vect_create_epilog_for_reduction (vec<tr Link Here
4899
    }
4899
    }
4900
  else
4900
  else
4901
    {
4901
    {
4902
      bool reduce_with_shift = have_whole_vector_shift (mode);
4902
      bool reduce_with_shift;
4903
      int element_bitsize = tree_to_uhwi (bitsize);
4904
      int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4905
      tree vec_temp;
4903
      tree vec_temp;
4906
4904
 
4907
      /* Regardless of whether we have a whole vector shift, if we're
4905
      enum machine_mode mode1 = mode;
4908
         emulating the operation via tree-vect-generic, we don't want
4906
      tree vectype1 = vectype;
4909
         to use it.  Only the first round of the reduction is likely
4907
      unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
4910
         to still be profitable via emulation.  */
4908
      unsigned sz1 = sz;
4911
      /* ??? It might be better to emit a reduction tree code here, so that
4909
      if (!slp_reduc && targetm.vectorize.autovectorize_vector_sizes () != 0)
4912
         tree-vect-generic can expand the first round via bit tricks.  */
4910
        sz1 = ctz_hwi (targetm.vectorize.autovectorize_vector_sizes ());
4913
      if (!VECTOR_MODE_P (mode))
4911
      do
4914
        reduce_with_shift = false;
4912
	{
4915
      else
4913
	  vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
4916
        {
4914
	  if (vectype1)
4917
          optab optab = optab_for_tree_code (code, vectype, optab_default);
4915
	    {
4918
          if (optab_handler (optab, mode) == CODE_FOR_nothing)
4916
	      mode1 = TYPE_MODE (vectype1);
4919
            reduce_with_shift = false;
4917
	      reduce_with_shift = have_whole_vector_shift (mode1);
4920
        }
4918
4919
	      /* Regardless of whether we have a whole vector shift, if we're
4920
		 emulating the operation via tree-vect-generic, we don't want
4921
		 to use it.  Only the first round of the reduction is likely
4922
		 to still be profitable via emulation.  */
4923
	      /* ??? It might be better to emit a reduction tree code here, so that
4924
		 tree-vect-generic can expand the first round via bit tricks.  */
4925
	      if (!VECTOR_MODE_P (mode1))
4926
		reduce_with_shift = false;
4927
	      else
4928
		{
4929
		  optab optab = optab_for_tree_code (code, vectype1, optab_default);
4930
		  if (optab_handler (optab, mode1) == CODE_FOR_nothing)
4931
		    reduce_with_shift = false;
4932
		}
4933
	    }
4934
4935
	  if (sz == sz1
4936
	      || reduce_with_shift)
4937
	    break;
4938
4939
	  sz1 *= 2;
4940
	}
4941
      while (1);
4921
4942
4922
      if (reduce_with_shift && !slp_reduc)
4943
      if (reduce_with_shift && !slp_reduc)
4923
        {
4944
        {
4924
          int nelements = vec_size_in_bits / element_bitsize;
4945
          new_temp = new_phi_result;
4946
	  while (sz > sz1)
4947
	    {
4948
	      sz /= 2;
4949
	      vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
4950
	      tree dst1 = make_ssa_name (vectype1);
4951
	      epilog_stmt = gimple_build_assign (dst1, BIT_FIELD_REF,
4952
				   build3 (BIT_FIELD_REF, vectype1,
4953
					   new_temp, TYPE_SIZE (vectype1),
4954
					   size_int (0)));
4955
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4956
	      tree dst2 =  make_ssa_name (vectype1);
4957
	      epilog_stmt = gimple_build_assign (dst2, BIT_FIELD_REF,
4958
				   build3 (BIT_FIELD_REF, vectype1,
4959
					   new_temp, TYPE_SIZE (vectype1),
4960
					   size_int (sz * BITS_PER_UNIT)));
4961
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4962
	      new_temp = make_ssa_name (vectype1);
4963
	      epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
4964
              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4965
	    }
4966
          int nelements = TYPE_VECTOR_SUBPARTS (vectype1);
4925
          unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4967
          unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4926
4968
4927
          int elt_offset;
4969
          int elt_offset;
4928
4970
4929
          tree zero_vec = build_zero_cst (vectype);
4971
          tree zero_vec = build_zero_cst (vectype1);
4930
          /*** Case 2: Create:
4972
          /*** Case 2: Create:
4931
             for (offset = nelements/2; offset >= 1; offset/=2)
4973
             for (offset = nelements/2; offset >= 1; offset/=2)
4932
                {
4974
                {
Lines 4940-4953 vect_create_epilog_for_reduction (vec<tr Link Here
4940
            dump_printf_loc (MSG_NOTE, vect_location,
4982
            dump_printf_loc (MSG_NOTE, vect_location,
4941
			     "Reduce using vector shifts\n");
4983
			     "Reduce using vector shifts\n");
4942
4984
4943
          vec_dest = vect_create_destination_var (scalar_dest, vectype);
4985
	  mode1 = TYPE_MODE (vectype1);
4944
          new_temp = new_phi_result;
4986
          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
4945
          for (elt_offset = nelements / 2;
4987
          for (elt_offset = nelements / 2;
4946
               elt_offset >= 1;
4988
               elt_offset >= 1;
4947
               elt_offset /= 2)
4989
               elt_offset /= 2)
4948
            {
4990
            {
4949
              calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4991
              calc_vec_perm_mask_for_shift (mode1, elt_offset, sel);
4950
              tree mask = vect_gen_perm_mask_any (vectype, sel);
4992
              tree mask = vect_gen_perm_mask_any (vectype1, sel);
4951
	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4993
	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4952
						 new_temp, zero_vec, mask);
4994
						 new_temp, zero_vec, mask);
4953
              new_name = make_ssa_name (vec_dest, epilog_stmt);
4995
              new_name = make_ssa_name (vec_dest, epilog_stmt);
Lines 4992-4998 vect_create_epilog_for_reduction (vec<tr Link Here
4992
            dump_printf_loc (MSG_NOTE, vect_location,
5034
            dump_printf_loc (MSG_NOTE, vect_location,
4993
			     "Reduce using scalar code.\n");
5035
			     "Reduce using scalar code.\n");
4994
5036
4995
          vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5037
          int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5038
	  int element_bitsize = tree_to_uhwi (bitsize);
4996
          FOR_EACH_VEC_ELT (new_phis, i, new_phi)
5039
          FOR_EACH_VEC_ELT (new_phis, i, new_phi)
4997
            {
5040
            {
4998
              int bit_offset;
5041
              int bit_offset;
(-)gcc/tree-vect-stmts.c (-2 / +2 lines)
Lines 6434-6440 vect_gen_perm_mask_any (tree vectype, co Link Here
6434
6434
6435
  mask_elt_type = lang_hooks.types.type_for_mode
6435
  mask_elt_type = lang_hooks.types.type_for_mode
6436
		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6436
		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6437
  mask_type = get_vectype_for_scalar_type (mask_elt_type);
6437
  mask_type = get_same_sized_vectype (mask_elt_type, vectype);
6438
6438
6439
  mask_elts = XALLOCAVEC (tree, nunits);
6439
  mask_elts = XALLOCAVEC (tree, nunits);
6440
  for (i = nunits - 1; i >= 0; i--)
6440
  for (i = nunits - 1; i >= 0; i--)
Lines 8985-8991 free_stmt_vec_info (gimple *stmt) Link Here
8985
   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
8985
   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
8986
   by the target.  */
8986
   by the target.  */
8987
8987
8988
static tree
8988
tree
8989
get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8989
get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8990
{
8990
{
8991
  tree orig_scalar_type = scalar_type;
8991
  tree orig_scalar_type = scalar_type;
(-)gcc/tree-vectorizer.h (+1 lines)
Lines 1062-1067 extern bool vect_can_advance_ivs_p (loop Link Here
1062
1062
1063
/* In tree-vect-stmts.c.  */
1063
/* In tree-vect-stmts.c.  */
1064
extern unsigned int current_vector_size;
1064
extern unsigned int current_vector_size;
1065
extern tree get_vectype_for_scalar_type_and_size (tree, unsigned);
1065
extern tree get_vectype_for_scalar_type (tree);
1066
extern tree get_vectype_for_scalar_type (tree);
1066
extern tree get_mask_type_for_scalar_type (tree);
1067
extern tree get_mask_type_for_scalar_type (tree);
1067
extern tree get_same_sized_vectype (tree, tree);
1068
extern tree get_same_sized_vectype (tree, tree);

Return to bug 80846