[PATCH] Vectorize strided group loads
Richard Biener
rguenther@suse.de
Fri May 8 15:07:00 GMT 2015
Currently the vectorizer forces unrolling for grouped loads that
have DR_STEP not constant, forcing the elements loaded with strided
load support. The following patch enhances that machinery to deal
with SLP used groups that have non-constant DR_STEP, avoiding the
excessive unrolling (and (un-)packing).
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2015-05-08 Richard Biener <rguenther@suse.de>
* tree-vect-data-refs.c (vect_compute_data_ref_alignment):
Handle strided group loads.
(vect_verify_datarefs_alignment): Likewise.
(vect_enhance_data_refs_alignment): Likewise.
(vect_analyze_group_access): Likewise.
(vect_analyze_data_ref_access): Likewise.
(vect_analyze_data_ref_accesses): Likewise.
* tree-vect-stmts.c (vect_model_load_cost): Likewise.
(vectorizable_load): Likewise.
* gcc.dg/vect/slp-41.c: New testcase.
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c.orig 2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-data-refs.c 2015-05-08 13:26:23.839725349 +0200
*************** vect_compute_data_ref_alignment (struct
*** 671,677 ****
tree vectype;
tree base, base_addr;
bool base_aligned;
! tree misalign;
tree aligned_to;
unsigned HOST_WIDE_INT alignment;
--- 671,677 ----
tree vectype;
tree base, base_addr;
bool base_aligned;
! tree misalign = NULL_TREE;
tree aligned_to;
unsigned HOST_WIDE_INT alignment;
*************** vect_compute_data_ref_alignment (struct
*** 687,696 ****
/* Strided loads perform only component accesses, misalignment information
is irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
return true;
! misalign = DR_INIT (dr);
aligned_to = DR_ALIGNED_TO (dr);
base_addr = DR_BASE_ADDRESS (dr);
vectype = STMT_VINFO_VECTYPE (stmt_info);
--- 687,698 ----
/* Strided loads perform only component accesses, misalignment information
is irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
return true;
! if (tree_fits_shwi_p (DR_STEP (dr)))
! misalign = DR_INIT (dr);
aligned_to = DR_ALIGNED_TO (dr);
base_addr = DR_BASE_ADDRESS (dr);
vectype = STMT_VINFO_VECTYPE (stmt_info);
*************** vect_compute_data_ref_alignment (struct
*** 704,712 ****
if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
! if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
--- 706,714 ----
if (loop && nested_in_vect_loop_p (loop, stmt))
{
tree step = DR_STEP (dr);
! if (tree_fits_shwi_p (step)
! && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
*************** vect_compute_data_ref_alignment (struct
*** 732,740 ****
if (!loop)
{
tree step = DR_STEP (dr);
- HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
! if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--- 734,742 ----
if (!loop)
{
tree step = DR_STEP (dr);
! if (tree_fits_shwi_p (step)
! && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
*************** vect_verify_datarefs_alignment (loop_vec
*** 964,970 ****
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
--- 966,973 ----
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1431,1437 ****
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
--- 1434,1441 ----
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1723,1729 ****
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr);
--- 1727,1734 ----
/* Strided loads perform only component accesses, alignment is
irrelevant for them. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
continue;
save_misalignment = DR_MISALIGNMENT (dr);
*************** vect_enhance_data_refs_alignment (loop_v
*** 1841,1850 ****
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt))
continue;
- /* Strided loads perform only component accesses, alignment is
- irrelevant for them. */
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! continue;
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
--- 1846,1860 ----
&& GROUP_FIRST_ELEMENT (stmt_info) != stmt))
continue;
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! {
! /* Strided loads perform only component accesses, alignment is
! irrelevant for them. */
! if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
! continue;
! do_versioning = false;
! break;
! }
supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
*************** vect_analyze_group_access (struct data_r
*** 2057,2063 ****
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
! HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
HOST_WIDE_INT groupsize, last_accessed_element = 1;
bool slp_impossible = false;
struct loop *loop = NULL;
--- 2067,2073 ----
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
! HOST_WIDE_INT dr_step = -1;
HOST_WIDE_INT groupsize, last_accessed_element = 1;
bool slp_impossible = false;
struct loop *loop = NULL;
*************** vect_analyze_group_access (struct data_r
*** 2067,2073 ****
/* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
size of the interleaving group (including gaps). */
! groupsize = absu_hwi (dr_step) / type_size;
/* Not consecutive access is possible only if it is a part of interleaving. */
if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
--- 2077,2089 ----
/* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
size of the interleaving group (including gaps). */
! if (tree_fits_shwi_p (step))
! {
! dr_step = tree_to_shwi (step);
! groupsize = absu_hwi (dr_step) / type_size;
! }
! else
! groupsize = 0;
/* Not consecutive access is possible only if it is a part of interleaving. */
if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
*************** vect_analyze_group_access (struct data_r
*** 2142,2148 ****
tree prev_init = DR_INIT (data_ref);
gimple prev = stmt;
HOST_WIDE_INT diff, gaps = 0;
- unsigned HOST_WIDE_INT count_in_bytes;
while (next)
{
--- 2158,2163 ----
*************** vect_analyze_group_access (struct data_r
*** 2211,2240 ****
count++;
}
! /* COUNT is the number of accesses found, we multiply it by the size of
! the type to get COUNT_IN_BYTES. */
! count_in_bytes = type_size * count;
!
! /* Check that the size of the interleaving (including gaps) is not
! greater than STEP. */
! if (dr_step != 0
! && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
! {
! if (dump_enabled_p ())
! {
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "interleaving size is greater than step for ");
! dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
! DR_REF (dr));
! dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
! }
! return false;
! }
! /* Check that the size of the interleaving is equal to STEP for stores,
i.e., that there are no gaps. */
! if (dr_step != 0
! && absu_hwi (dr_step) != count_in_bytes)
{
if (DR_IS_READ (dr))
{
--- 2226,2237 ----
count++;
}
! if (groupsize == 0)
! groupsize = count + gaps;
! /* Check that the size of the interleaving is equal to count for stores,
i.e., that there are no gaps. */
! if (groupsize != count)
{
if (DR_IS_READ (dr))
{
*************** vect_analyze_group_access (struct data_r
*** 2253,2278 ****
}
}
- /* Check that STEP is a multiple of type size. */
- if (dr_step != 0
- && (dr_step % type_size) != 0)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "step is not a multiple of type size: step ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
- dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
- dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
- TYPE_SIZE_UNIT (scalar_type));
- dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
- }
- return false;
- }
-
- if (groupsize == 0)
- groupsize = count + gaps;
-
GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
--- 2250,2255 ----
*************** vect_analyze_data_ref_access (struct dat
*** 2392,2400 ****
return false;
}
/* Assume this is a DR handled by non-constant strided load case. */
if (TREE_CODE (step) != INTEGER_CST)
! return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
/* Not consecutive access - check if it's a part of interleaving group. */
return vect_analyze_group_access (dr);
--- 2369,2380 ----
return false;
}
+
/* Assume this is a DR handled by non-constant strided load case. */
if (TREE_CODE (step) != INTEGER_CST)
! return (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
! || vect_analyze_group_access (dr)));
/* Not consecutive access - check if it's a part of interleaving group. */
return vect_analyze_group_access (dr);
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2596,2610 ****
|| !gimple_assign_single_p (DR_STMT (drb)))
break;
! /* Check that the data-refs have the same constant size and step. */
tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
if (!tree_fits_uhwi_p (sza)
|| !tree_fits_uhwi_p (szb)
! || !tree_int_cst_equal (sza, szb)
! || !tree_fits_shwi_p (DR_STEP (dra))
! || !tree_fits_shwi_p (DR_STEP (drb))
! || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
break;
/* Do not place the same access in the interleaving chain twice. */
--- 2576,2591 ----
|| !gimple_assign_single_p (DR_STMT (drb)))
break;
! /* Check that the data-refs have the same constant size. */
tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
if (!tree_fits_uhwi_p (sza)
|| !tree_fits_uhwi_p (szb)
! || !tree_int_cst_equal (sza, szb))
! break;
!
! /* Check that the data-refs have the same step. */
! if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
break;
/* Do not place the same access in the interleaving chain twice. */
*************** vect_analyze_data_ref_accesses (loop_vec
*** 2637,2647 ****
!= type_size_a))
break;
! /* The step (if not zero) is greater than the difference between
! data-refs' inits. This splits groups into suitable sizes. */
! HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
! if (step != 0 && step <= (init_b - init_a))
! break;
if (dump_enabled_p ())
{
--- 2618,2632 ----
!= type_size_a))
break;
! /* If the step (if not zero or non-constant) is greater than the
! difference between data-refs' inits this splits groups into
! suitable sizes. */
! if (tree_fits_shwi_p (DR_STEP (dra)))
! {
! HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
! if (step != 0 && step <= (init_b - init_a))
! break;
! }
if (dump_enabled_p ())
{
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig 2015-05-08 13:24:31.797746925 +0200
--- gcc/tree-vect-stmts.c 2015-05-08 13:28:00.920573458 +0200
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1112,1118 ****
equivalent to the cost of GROUP_SIZE separate loads. If a grouped
access is instead being provided by a load-and-permute operation,
include the cost of the permutes. */
! if (!load_lanes_p && group_size > 1)
{
/* Uses an even and odd extract operations or shuffle operations
for each needed permute. */
--- 1112,1119 ----
equivalent to the cost of GROUP_SIZE separate loads. If a grouped
access is instead being provided by a load-and-permute operation,
include the cost of the permutes. */
! if (!load_lanes_p && group_size > 1
! && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
{
/* Uses an even and odd extract operations or shuffle operations
for each needed permute. */
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1127,1141 ****
}
/* The loads themselves. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
{
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
inside_cost += record_stmt_cost (body_cost_vec,
ncopies * TYPE_VECTOR_SUBPARTS (vectype),
scalar_load, stmt_info, 0, vect_body);
- inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
- stmt_info, 0, vect_body);
}
else
vect_get_load_cost (first_dr, ncopies,
--- 1128,1141 ----
}
/* The loads themselves. */
! if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
inside_cost += record_stmt_cost (body_cost_vec,
ncopies * TYPE_VECTOR_SUBPARTS (vectype),
scalar_load, stmt_info, 0, vect_body);
}
else
vect_get_load_cost (first_dr, ncopies,
*************** vect_model_load_cost (stmt_vec_info stmt
*** 1143,1148 ****
--- 1143,1151 ----
|| group_size > 1 || slp_node),
&inside_cost, &prologue_cost,
prologue_cost_vec, body_cost_vec, true);
+ if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
+ stmt_info, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
*************** vectorizable_load (gimple stmt, gimple_s
*** 5657,5663 ****
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size, group_gap;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
--- 5660,5666 ----
gimple ptr_incr = NULL;
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
! int i, j, group_size = -1, group_gap;
tree msq = NULL_TREE, lsq;
tree offset = NULL_TREE;
tree byte_offset = NULL_TREE;
*************** vectorizable_load (gimple stmt, gimple_s
*** 5790,5798 ****
return false;
}
! if (!slp && !PURE_SLP_STMT (stmt_info))
{
- group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
if (vect_load_lanes_supported (vectype, group_size))
load_lanes_p = true;
else if (!vect_grouped_load_supported (vectype, group_size))
--- 5793,5803 ----
return false;
}
! group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
! if (!slp
! && !PURE_SLP_STMT (stmt_info)
! && !STMT_VINFO_STRIDE_LOAD_P (stmt_info))
{
if (vect_load_lanes_supported (vectype, group_size))
load_lanes_p = true;
else if (!vect_grouped_load_supported (vectype, group_size))
*************** vectorizable_load (gimple stmt, gimple_s
*** 5847,5853 ****
}
}
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! ;
else
{
negative = tree_int_cst_compare (nested_in_vect_loop
--- 5852,5873 ----
}
}
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
! {
! if ((grouped_load
! && (slp || PURE_SLP_STMT (stmt_info)))
! && (group_size > nunits
! || nunits % group_size != 0
! /* ??? During analysis phase we are not called with the
! slp node/instance we are in so whether we'll end up
! with a permutation we don't know. Still we don't
! support load permutations. */
! || slp_perm))
! {
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "unhandled strided group load\n");
! return false;
! }
! }
else
{
negative = tree_int_cst_compare (nested_in_vect_loop
*************** vectorizable_load (gimple stmt, gimple_s
*** 6136,6169 ****
prev_stmt_info = NULL;
running_off = offvar;
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
for (j = 0; j < ncopies; j++)
{
tree vec_inv;
! vec_alloc (v, nunits);
! for (i = 0; i < nunits; i++)
{
! tree newref, newoff;
! gimple incr;
! newref = build2 (MEM_REF, TREE_TYPE (vectype),
! running_off, alias_off);
!
! newref = force_gimple_operand_gsi (gsi, newref, true,
! NULL_TREE, true,
! GSI_SAME_STMT);
! CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
! newoff = copy_ssa_name (running_off);
! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
}
! vec_inv = build_constructor (vectype, v);
! new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
! new_stmt = SSA_NAME_DEF_STMT (new_temp);
!
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
--- 6156,6220 ----
prev_stmt_info = NULL;
running_off = offvar;
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
+ int nloads = nunits;
+ tree ltype = TREE_TYPE (vectype);
+ if (slp)
+ {
+ nloads = nunits / group_size;
+ if (group_size < nunits)
+ ltype = build_vector_type (TREE_TYPE (vectype), group_size);
+ else
+ ltype = vectype;
+ ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ gcc_assert (!slp_perm);
+ }
for (j = 0; j < ncopies; j++)
{
tree vec_inv;
! if (nloads > 1)
{
! vec_alloc (v, nloads);
! for (i = 0; i < nloads; i++)
! {
! tree newref, newoff;
! gimple incr;
! newref = build2 (MEM_REF, ltype, running_off, alias_off);
!
! newref = force_gimple_operand_gsi (gsi, newref, true,
! NULL_TREE, true,
! GSI_SAME_STMT);
! CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
! newoff = copy_ssa_name (running_off);
! incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
! running_off, stride_step);
! vect_finish_stmt_generation (stmt, incr, gsi);
!
! running_off = newoff;
! }
!
! vec_inv = build_constructor (vectype, v);
! new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
! new_stmt = SSA_NAME_DEF_STMT (new_temp);
! }
! else
! {
! new_stmt = gimple_build_assign (make_ssa_name (ltype),
! build2 (MEM_REF, ltype,
! running_off, alias_off));
! vect_finish_stmt_generation (stmt, new_stmt, gsi);
!
! tree newoff = copy_ssa_name (running_off);
! gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
running_off, stride_step);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
}
! if (slp)
! SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
Index: gcc/testsuite/gcc.dg/vect/slp-41.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/vect/slp-41.c 2015-05-08 13:26:23.916726022 +0200
***************
*** 0 ****
--- 1,69 ----
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-require-effective-target vect_pack_trunc } */
+ /* { dg-require-effective-target vect_unpack } */
+ /* { dg-require-effective-target vect_hw_misalign } */
+
+ #include "tree-vect.h"
+
+ void __attribute__((noinline,noclone))
+ testi (int *p, short *q, int stride, int n)
+ {
+ int i;
+ for (i = 0; i < n; ++i)
+ {
+ q[i*4+0] = p[i*stride+0];
+ q[i*4+1] = p[i*stride+1];
+ q[i*4+2] = p[i*stride+2];
+ q[i*4+3] = p[i*stride+3];
+ }
+ }
+
+ void __attribute__((noinline,noclone))
+ testi2 (int *q, short *p, int stride, int n)
+ {
+ int i;
+ for (i = 0; i < n; ++i)
+ {
+ q[i*4+0] = p[i*stride+0];
+ q[i*4+1] = p[i*stride+1];
+ q[i*4+2] = p[i*stride+2];
+ q[i*4+3] = p[i*stride+3];
+ }
+ }
+
+ int ia[256];
+ short sa[256];
+
+ extern void abort (void);
+
+ int main()
+ {
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < 256; ++i)
+ {
+ ia[i] = sa[i] = i;
+ __asm__ volatile ("");
+ }
+ testi (ia, sa, 8, 32);
+ for (i = 0; i < 128; ++i)
+ if (sa[i] != ia[(i / 4) * 8 + i % 4])
+ abort ();
+
+ for (i = 0; i < 256; ++i)
+ {
+ ia[i] = sa[i] = i;
+ __asm__ volatile ("");
+ }
+ testi2 (ia, sa, 8, 32);
+ for (i = 0; i < 128; ++i)
+ if (ia[i] != sa[(i / 4) * 8 + i % 4])
+ abort ();
+
+ return 0;
+ }
+
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+ /* { dg-final { cleanup-tree-dump "vect" } } */
More information about the Gcc-patches
mailing list