This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Vectorize strided group loads


Currently the vectorizer forces unrolling for grouped loads that
have DR_STEP not constant, forcing the elements loaded with strided
load support.  The following patch enhances that machinery to deal
with SLP used groups that have non-constant DR_STEP, avoiding the
excessive unrolling (and (un-)packing).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-08  Richard Biener  <rguenther@suse.de>

	* tree-vect-data-refs.c (vect_compute_data_ref_alignment):
	Handle strided group loads.
	(vect_verify_datarefs_alignment): Likewise.
	(vect_enhance_data_refs_alignment): Likewise.
	(vect_analyze_group_access): Likewise.
	(vect_analyze_data_ref_access): Likewise.
	(vect_analyze_data_ref_accesses): Likewise.
	* tree-vect-stmts.c (vect_model_load_cost): Likewise.
	(vectorizable_load): Likewise.

	* gcc.dg/vect/slp-41.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig	2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-data-refs.c	2015-05-08 13:26:23.839725349 +0200
*************** vect_compute_data_ref_alignment (struct
*** 671,677 ****
    tree vectype;
    tree base, base_addr;
    bool base_aligned;
!   tree misalign;
    tree aligned_to;
    unsigned HOST_WIDE_INT alignment;
  
--- 671,677 ----
    tree vectype;
    tree base, base_addr;
    bool base_aligned;
!   tree misalign = NULL_TREE;
    tree aligned_to;
    unsigned HOST_WIDE_INT alignment;
  
*************** vect_compute_data_ref_alignment (struct
*** 687,696 ****
  
    /* Strided loads perform only component accesses, misalignment information
       is irrelevant for them.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      return true;
  
!   misalign = DR_INIT (dr);
    aligned_to = DR_ALIGNED_TO (dr);
    base_addr = DR_BASE_ADDRESS (dr);
    vectype = STMT_VINFO_VECTYPE (stmt_info);
--- 687,698 ----
  
    /* Strided loads perform only component accesses, misalignment information
       is irrelevant for them.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
      return true;
  
!   if (tree_fits_shwi_p (DR_STEP (dr)))
!     misalign = DR_INIT (dr);
    aligned_to = DR_ALIGNED_TO (dr);
    base_addr = DR_BASE_ADDRESS (dr);
    vectype = STMT_VINFO_VECTYPE (stmt_info);
*************** vect_compute_data_ref_alignment (struct
*** 704,712 ****
    if (loop && nested_in_vect_loop_p (loop, stmt))
      {
        tree step = DR_STEP (dr);
-       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
  
!       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
          {
            if (dump_enabled_p ())
              dump_printf_loc (MSG_NOTE, vect_location,
--- 706,714 ----
    if (loop && nested_in_vect_loop_p (loop, stmt))
      {
        tree step = DR_STEP (dr);
  
!       if (tree_fits_shwi_p (step)
! 	  && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
          {
            if (dump_enabled_p ())
              dump_printf_loc (MSG_NOTE, vect_location,
*************** vect_compute_data_ref_alignment (struct
*** 732,740 ****
    if (!loop)
      {
        tree step = DR_STEP (dr);
-       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
  
!       if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
  	{
  	  if (dump_enabled_p ())
  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 734,742 ----
    if (!loop)
      {
        tree step = DR_STEP (dr);
  
!       if (tree_fits_shwi_p (step)
! 	  && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
  	{
  	  if (dump_enabled_p ())
  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*************** vect_verify_datarefs_alignment (loop_vec
*** 964,970 ****
  
        /* Strided loads perform only component accesses, alignment is
  	 irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
  	continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
--- 966,973 ----
  
        /* Strided loads perform only component accesses, alignment is
  	 irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
  	continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1431,1437 ****
  
        /* Strided loads perform only component accesses, alignment is
  	 irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
  	continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
--- 1434,1441 ----
  
        /* Strided loads perform only component accesses, alignment is
  	 irrelevant for them.  */
!       if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
  	continue;
  
        supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1723,1729 ****
  
  	  /* Strided loads perform only component accesses, alignment is
  	     irrelevant for them.  */
! 	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
  	    continue;
  
  	  save_misalignment = DR_MISALIGNMENT (dr);
--- 1727,1734 ----
  
  	  /* Strided loads perform only component accesses, alignment is
  	     irrelevant for them.  */
! 	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! 	      && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
  	    continue;
  
  	  save_misalignment = DR_MISALIGNMENT (dr);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1841,1850 ****
  		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
  	    continue;
  
- 	  /* Strided loads perform only component accesses, alignment is
- 	     irrelevant for them.  */
  	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! 	    continue;
  
  	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
  
--- 1846,1860 ----
  		  && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
  	    continue;
  
  	  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! 	    {
! 	      /* Strided loads perform only component accesses, alignment is
! 		 irrelevant for them.  */
! 	      if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
! 		continue;
! 	      do_versioning = false;
! 	      break;
! 	    }
  
  	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
  
*************** vect_analyze_group_access (struct data_r
*** 2057,2063 ****
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
!   HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
    HOST_WIDE_INT groupsize, last_accessed_element = 1;
    bool slp_impossible = false;
    struct loop *loop = NULL;
--- 2067,2073 ----
    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
    bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
!   HOST_WIDE_INT dr_step = -1;
    HOST_WIDE_INT groupsize, last_accessed_element = 1;
    bool slp_impossible = false;
    struct loop *loop = NULL;
*************** vect_analyze_group_access (struct data_r
*** 2067,2073 ****
  
    /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
       size of the interleaving group (including gaps).  */
!   groupsize = absu_hwi (dr_step) / type_size;
  
    /* Not consecutive access is possible only if it is a part of interleaving.  */
    if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
--- 2077,2089 ----
  
    /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
       size of the interleaving group (including gaps).  */
!   if (tree_fits_shwi_p (step))
!     {
!       dr_step = tree_to_shwi (step);
!       groupsize = absu_hwi (dr_step) / type_size;
!     }
!   else
!     groupsize = 0;
  
    /* Not consecutive access is possible only if it is a part of interleaving.  */
    if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
*************** vect_analyze_group_access (struct data_r
*** 2142,2148 ****
        tree prev_init = DR_INIT (data_ref);
        gimple prev = stmt;
        HOST_WIDE_INT diff, gaps = 0;
-       unsigned HOST_WIDE_INT count_in_bytes;
  
        while (next)
          {
--- 2158,2163 ----
*************** vect_analyze_group_access (struct data_r
*** 2211,2240 ****
            count++;
          }
  
!       /* COUNT is the number of accesses found, we multiply it by the size of
!          the type to get COUNT_IN_BYTES.  */
!       count_in_bytes = type_size * count;
! 
!       /* Check that the size of the interleaving (including gaps) is not
!          greater than STEP.  */
!       if (dr_step != 0
! 	  && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
!         {
!           if (dump_enabled_p ())
!             {
!               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!                                "interleaving size is greater than step for ");
!               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
!                                  DR_REF (dr));
!               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
!             }
!           return false;
!         }
  
!       /* Check that the size of the interleaving is equal to STEP for stores,
           i.e., that there are no gaps.  */
!       if (dr_step != 0
! 	  && absu_hwi (dr_step) != count_in_bytes)
          {
            if (DR_IS_READ (dr))
              {
--- 2226,2237 ----
            count++;
          }
  
!       if (groupsize == 0)
!         groupsize = count + gaps;
  
!       /* Check that the size of the interleaving is equal to count for stores,
           i.e., that there are no gaps.  */
!       if (groupsize != count)
          {
            if (DR_IS_READ (dr))
              {
*************** vect_analyze_group_access (struct data_r
*** 2253,2278 ****
              }
          }
  
-       /* Check that STEP is a multiple of type size.  */
-       if (dr_step != 0
- 	  && (dr_step % type_size) != 0)
-         {
-           if (dump_enabled_p ())
-             {
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "step is not a multiple of type size: step ");
-               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
-               dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
-               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                  TYPE_SIZE_UNIT (scalar_type));
-               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-             }
-           return false;
-         }
- 
-       if (groupsize == 0)
-         groupsize = count + gaps;
- 
        GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
        if (dump_enabled_p ())
          dump_printf_loc (MSG_NOTE, vect_location,
--- 2250,2255 ----
*************** vect_analyze_data_ref_access (struct dat
*** 2392,2400 ****
        return false;
      }
  
    /* Assume this is a DR handled by non-constant strided load case.  */
    if (TREE_CODE (step) != INTEGER_CST)
!     return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
  
    /* Not consecutive access - check if it's a part of interleaving group.  */
    return vect_analyze_group_access (dr);
--- 2369,2380 ----
        return false;
      }
  
+ 
    /* Assume this is a DR handled by non-constant strided load case.  */
    if (TREE_CODE (step) != INTEGER_CST)
!     return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! 	    && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
! 		|| vect_analyze_group_access (dr)));
  
    /* Not consecutive access - check if it's a part of interleaving group.  */
    return vect_analyze_group_access (dr);
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2596,2610 ****
  	      || !gimple_assign_single_p (DR_STMT (drb)))
  	    break;
  
! 	  /* Check that the data-refs have the same constant size and step.  */
  	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
  	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
  	  if (!tree_fits_uhwi_p (sza)
  	      || !tree_fits_uhwi_p (szb)
! 	      || !tree_int_cst_equal (sza, szb)
! 	      || !tree_fits_shwi_p (DR_STEP (dra))
! 	      || !tree_fits_shwi_p (DR_STEP (drb))
! 	      || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
  	    break;
  
  	  /* Do not place the same access in the interleaving chain twice.  */
--- 2576,2591 ----
  	      || !gimple_assign_single_p (DR_STMT (drb)))
  	    break;
  
! 	  /* Check that the data-refs have the same constant size.  */
  	  tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
  	  tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
  	  if (!tree_fits_uhwi_p (sza)
  	      || !tree_fits_uhwi_p (szb)
! 	      || !tree_int_cst_equal (sza, szb))
! 	    break;
! 
! 	  /* Check that the data-refs have the same step.  */
! 	  if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
  	    break;
  
  	  /* Do not place the same access in the interleaving chain twice.  */
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2637,2647 ****
  		  != type_size_a))
  	    break;
  
! 	  /* The step (if not zero) is greater than the difference between
! 	     data-refs' inits.  This splits groups into suitable sizes.  */
! 	  HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
! 	  if (step != 0 && step <= (init_b - init_a))
! 	    break;
  
  	  if (dump_enabled_p ())
  	    {
--- 2618,2632 ----
  		  != type_size_a))
  	    break;
  
! 	  /* If the step (if not zero or non-constant) is greater than the
! 	     difference between data-refs' inits this splits groups into
! 	     suitable sizes.  */
! 	  if (tree_fits_shwi_p (DR_STEP (dra)))
! 	    {
! 	      HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
! 	      if (step != 0 && step <= (init_b - init_a))
! 		break;
! 	    }
  
  	  if (dump_enabled_p ())
  	    {
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig	2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-stmts.c	2015-05-08 13:28:00.920573458 +0200
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1112,1118 ****
       equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
       access is instead being provided by a load-and-permute operation,
       include the cost of the permutes.  */
!   if (!load_lanes_p && group_size > 1)
      {
        /* Uses an even and odd extract operations or shuffle operations
  	 for each needed permute.  */
--- 1112,1119 ----
       equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
       access is instead being provided by a load-and-permute operation,
       include the cost of the permutes.  */
!   if (!load_lanes_p && group_size > 1
!       && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      {
        /* Uses an even and odd extract operations or shuffle operations
  	 for each needed permute.  */
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1127,1141 ****
      }
  
    /* The loads themselves.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
      {
        /* N scalar loads plus gathering them into a vector.  */
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        inside_cost += record_stmt_cost (body_cost_vec,
  				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
  				       scalar_load, stmt_info, 0, vect_body);
-       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
- 				       stmt_info, 0, vect_body);
      }
    else
      vect_get_load_cost (first_dr, ncopies,
--- 1128,1141 ----
      }
  
    /* The loads themselves.  */
!   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
!       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
      {
        /* N scalar loads plus gathering them into a vector.  */
        tree vectype = STMT_VINFO_VECTYPE (stmt_info);
        inside_cost += record_stmt_cost (body_cost_vec,
  				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
  				       scalar_load, stmt_info, 0, vect_body);
      }
    else
      vect_get_load_cost (first_dr, ncopies,
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1143,1148 ****
--- 1143,1151 ----
  			 || group_size > 1 || slp_node),
  			&inside_cost, &prologue_cost, 
  			prologue_cost_vec, body_cost_vec, true);
+   if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+       inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
+ 				       stmt_info, 0, vect_body);
  
    if (dump_enabled_p ())
      dump_printf_loc (MSG_NOTE, vect_location,
*************** vectorizable_load (gimple stmt, gimple_s
*** 5657,5663 ****
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
--- 5660,5666 ----
    gimple ptr_incr = NULL;
    int nunits = TYPE_VECTOR_SUBPARTS (vectype);
    int ncopies;
!   int i, j, group_size = -1, group_gap;
    tree msq = NULL_TREE, lsq;
    tree offset = NULL_TREE;
    tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 5790,5798 ****
  	  return false;
  	}
  
!       if (!slp && !PURE_SLP_STMT (stmt_info))
  	{
- 	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
  	  if (vect_load_lanes_supported (vectype, group_size))
  	    load_lanes_p = true;
  	  else if (!vect_grouped_load_supported (vectype, group_size))
--- 5793,5803 ----
  	  return false;
  	}
  
!       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
!       if (!slp
! 	  && !PURE_SLP_STMT (stmt_info)
! 	  && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
  	{
  	  if (vect_load_lanes_supported (vectype, group_size))
  	    load_lanes_p = true;
  	  else if (!vect_grouped_load_supported (vectype, group_size))
*************** vectorizable_load (gimple stmt, gimple_s
*** 5847,5853 ****
  	}
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     ;
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
--- 5852,5873 ----
  	}
      }
    else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
!     {
!       if ((grouped_load
! 	   && (slp || PURE_SLP_STMT (stmt_info)))
! 	  && (group_size > nunits
! 	      || nunits % group_size != 0
! 	      /* ???  During analysis phase we are not called with the
! 	         slp node/instance we are in so whether we'll end up
! 		 with a permutation we don't know.  Still we don't
! 		 support load permutations.  */
! 	      || slp_perm))
! 	{
! 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! 			   "unhandled strided group load\n");
! 	  return false;
! 	}
!     }
    else
      {
        negative = tree_int_cst_compare (nested_in_vect_loop
*************** vectorizable_load (gimple stmt, gimple_s
*** 6136,6169 ****
        prev_stmt_info = NULL;
        running_off = offvar;
        alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
        for (j = 0; j < ncopies; j++)
  	{
  	  tree vec_inv;
  
! 	  vec_alloc (v, nunits);
! 	  for (i = 0; i < nunits; i++)
  	    {
! 	      tree newref, newoff;
! 	      gimple incr;
! 	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
! 			       running_off, alias_off);
! 
! 	      newref = force_gimple_operand_gsi (gsi, newref, true,
! 						 NULL_TREE, true,
! 						 GSI_SAME_STMT);
! 	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
! 	      newoff = copy_ssa_name (running_off);
! 	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
  					  running_off, stride_step);
  	      vect_finish_stmt_generation (stmt, incr, gsi);
  
  	      running_off = newoff;
  	    }
  
! 	  vec_inv = build_constructor (vectype, v);
! 	  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
! 	  new_stmt = SSA_NAME_DEF_STMT (new_temp);
! 
  	  if (j == 0)
  	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
  	  else
--- 6156,6220 ----
        prev_stmt_info = NULL;
        running_off = offvar;
        alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+       int nloads = nunits;
+       tree ltype = TREE_TYPE (vectype);
+       if (slp)
+ 	{
+ 	  nloads = nunits / group_size;
+ 	  if (group_size < nunits)
+ 	    ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+ 	  else
+ 	    ltype = vectype;
+ 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+ 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ 	  gcc_assert (!slp_perm);
+ 	}
        for (j = 0; j < ncopies; j++)
  	{
  	  tree vec_inv;
  
! 	  if (nloads > 1)
  	    {
! 	      vec_alloc (v, nloads);
! 	      for (i = 0; i < nloads; i++)
! 		{
! 		  tree newref, newoff;
! 		  gimple incr;
! 		  newref = build2 (MEM_REF, ltype, running_off, alias_off);
! 
! 		  newref = force_gimple_operand_gsi (gsi, newref, true,
! 						     NULL_TREE, true,
! 						     GSI_SAME_STMT);
! 		  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
! 		  newoff = copy_ssa_name (running_off);
! 		  incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
! 					      running_off, stride_step);
! 		  vect_finish_stmt_generation (stmt, incr, gsi);
! 
! 		  running_off = newoff;
! 		}
! 
! 	      vec_inv = build_constructor (vectype, v);
! 	      new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
! 	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
! 	    }
! 	  else
! 	    {
! 	      new_stmt = gimple_build_assign (make_ssa_name (ltype),
! 					      build2 (MEM_REF, ltype,
! 						      running_off, alias_off));
! 	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
! 
! 	      tree newoff = copy_ssa_name (running_off);
! 	      gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
  					  running_off, stride_step);
  	      vect_finish_stmt_generation (stmt, incr, gsi);
  
  	      running_off = newoff;
  	    }
  
! 	  if (slp)
! 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
  	  if (j == 0)
  	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
  	  else
Index: gcc/testsuite/gcc.dg/vect/slp-41.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/vect/slp-41.c	2015-05-08 13:26:23.916726022 +0200
***************
*** 0 ****
--- 1,69 ----
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-require-effective-target vect_pack_trunc } */
+ /* { dg-require-effective-target vect_unpack } */
+ /* { dg-require-effective-target vect_hw_misalign } */
+ 
+ #include "tree-vect.h"
+ 
+ void __attribute__((noinline,noclone))
+ testi (int *p, short *q, int stride, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     {
+       q[i*4+0] = p[i*stride+0];
+       q[i*4+1] = p[i*stride+1];
+       q[i*4+2] = p[i*stride+2];
+       q[i*4+3] = p[i*stride+3];
+     }
+ }
+ 
+ void __attribute__((noinline,noclone))
+ testi2 (int *q, short *p, int stride, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     {
+       q[i*4+0] = p[i*stride+0];
+       q[i*4+1] = p[i*stride+1];
+       q[i*4+2] = p[i*stride+2];
+       q[i*4+3] = p[i*stride+3];
+     }
+ }
+ 
+ int ia[256];
+ short sa[256];
+ 
+ extern void abort (void);
+ 
+ int main()
+ {
+   int i;
+ 
+   check_vect ();
+ 
+   for (i = 0; i < 256; ++i)
+     {
+       ia[i] = sa[i] = i;
+        __asm__ volatile ("");
+     }
+   testi (ia, sa, 8, 32);
+   for (i = 0; i < 128; ++i)
+     if (sa[i] != ia[(i / 4) * 8 + i % 4])
+       abort ();
+ 
+   for (i = 0; i < 256; ++i)
+     {
+       ia[i] = sa[i] = i;
+        __asm__ volatile ("");
+     }
+   testi2 (ia, sa, 8, 32);
+   for (i = 0; i < 128; ++i)
+     if (ia[i] != sa[(i / 4) * 8 + i % 4])
+       abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]