This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Teach the vectorizer about multiple sizes
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: irar at il dot ibm dot com
- Date: Wed, 6 Oct 2010 14:45:39 +0200 (CEST)
- Subject: [PATCH] Teach the vectorizer about multiple sizes
This teaches the vectorizer about multiple possible SIMD vector sizes
as for example supported by x86 with AVX and SSE. This is the least
intrusive approach of adding support for autovectorizing using AVX
without regressing compared to what loops are vectorized with SSE
(because we'll fall back to SSE if AVX doesn't work).
Bootstrapped and tested on x86_64-unknown-linux-gnu.
I also built SPEC CPU 2006 with -mavx and fixed the fallout - a
bogus builtin used for V8SF -> V8SI vectorization and us rejecting
shuffle masks (int!) for 32byte floating-point shuffles (oh well ...).
Ok for trunk?
Thanks,
Richard.
2010-10-06 Richard Guenther <rguenther@suse.de>
* tree-vectorizer.h (current_vector_size): Declare.
* tree-vect-stmts.c (perm_mask_for_reverse): Check if the
mask vector type is available.
(get_vectype_for_scalar_type): Rename to ...
(get_vectype_for_scalar_type_and_size): ... this. Get a vector
size argument.
(get_vectype_for_scalar_type): New wrapper around
get_vectype_for_scalar_type_and_size using current_vector_size.
(get_same_sized_vectype): Use get_vectype_for_scalar_type_and_size.
* tree-vect-loop.c (vect_analyze_loop_3): Split out core part
of vect_analyze_loop here.
(vect_analyze_loop): Loop over vector sizes calling vect_analyze_loop_3.
* tree-vect-slp.c (vect_slp_analyze_bb): Set current_vector_size
to autodetect.
* config/i386/i386.c (ix86_vectorize_builtin_conversion): Fix
V8SF to V8SI conversion builtin.
Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig 2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vectorizer.h 2010-10-06 13:25:09.000000000 +0200
*************** extern LOC find_loop_location (struct lo
*** 779,784 ****
--- 779,785 ----
extern bool vect_can_advance_ivs_p (loop_vec_info);
/* In tree-vect-stmts.c. */
+ extern unsigned int current_vector_size;
extern tree get_vectype_for_scalar_type (tree);
extern tree get_same_sized_vectype (tree, tree);
extern bool vect_is_simple_use (tree, loop_vec_info, bb_vec_info, gimple *,
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig 2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-stmts.c 2010-10-06 13:25:09.000000000 +0200
*************** perm_mask_for_reverse (tree vectype, tre
*** 3455,3461 ****
mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
! if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
return NULL;
for (i = 0; i < nunits; i++)
--- 3455,3462 ----
mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
! if (!mask_type
! || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
return NULL;
for (i = 0; i < nunits; i++)
*************** free_stmt_vec_info (gimple stmt)
*** 4784,4796 ****
}
! /* Function get_vectype_for_scalar_type.
! Returns the vector type corresponding to SCALAR_TYPE as supported
by the target. */
! tree
! get_vectype_for_scalar_type (tree scalar_type)
{
enum machine_mode inner_mode = TYPE_MODE (scalar_type);
enum machine_mode simd_mode;
--- 4785,4797 ----
}
! /* Function get_vectype_for_scalar_type_and_size.
! Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
by the target. */
! static tree
! get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
{
enum machine_mode inner_mode = TYPE_MODE (scalar_type);
enum machine_mode simd_mode;
*************** get_vectype_for_scalar_type (tree scalar
*** 4818,4824 ****
--- 4819,4836 ----
&& GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
return NULL_TREE;
+ /* Query the preferred simd mode and if that does not match the
+ requested size get a supported mode based on its mode class. */
simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
+ if (size != 0
+ && size != GET_MODE_SIZE (simd_mode))
+ {
+ simd_mode = mode_for_size (size * BITS_PER_UNIT,
+ GET_MODE_CLASS (simd_mode), 0);
+ if (simd_mode == BLKmode
+ || !targetm.vector_mode_supported_p (simd_mode))
+ return NULL_TREE;
+ }
nunits = GET_MODE_SIZE (simd_mode) / nbytes;
if (nunits <= 1)
return NULL_TREE;
*************** get_vectype_for_scalar_type (tree scalar
*** 4850,4864 ****
return vectype;
}
/* Function get_same_sized_vectype
Returns a vector type corresponding to SCALAR_TYPE of size
VECTOR_TYPE if supported by the target. */
tree
! get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
{
! return get_vectype_for_scalar_type (scalar_type);
}
/* Function vect_is_simple_use.
--- 4862,4896 ----
return vectype;
}
+ unsigned int current_vector_size;
+
+ /* Function get_vectype_for_scalar_type.
+
+ Returns the vector type corresponding to SCALAR_TYPE as supported
+ by the target. */
+
+ tree
+ get_vectype_for_scalar_type (tree scalar_type)
+ {
+ tree vectype;
+ vectype = get_vectype_for_scalar_type_and_size (scalar_type,
+ current_vector_size);
+ if (vectype
+ && current_vector_size == 0)
+ current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+ return vectype;
+ }
+
/* Function get_same_sized_vectype
Returns a vector type corresponding to SCALAR_TYPE of size
VECTOR_TYPE if supported by the target. */
tree
! get_same_sized_vectype (tree scalar_type, tree vector_type)
{
! return get_vectype_for_scalar_type_and_size
! (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
}
/* Function vect_is_simple_use.
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c.orig 2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-loop.c 2010-10-06 13:28:33.000000000 +0200
*************** vect_analyze_loop_operations (loop_vec_i
*** 1371,1411 ****
}
! /* Function vect_analyze_loop.
Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the
loop_vec_info struct. */
! loop_vec_info
! vect_analyze_loop (struct loop *loop)
{
bool ok, dummy;
- loop_vec_info loop_vinfo;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "===== analyze_loop_nest =====");
-
- if (loop_outer (loop)
- && loop_vec_info_for_loop (loop_outer (loop))
- && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "outer-loop already vectorized.");
- return NULL;
- }
-
- /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
-
- loop_vinfo = vect_analyze_loop_form (loop);
- if (!loop_vinfo)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad loop form.");
- return NULL;
- }
-
/* Find all data references in the loop (which correspond to vdefs/vuses)
and analyze their evolution in the loop. Also adjust the minimal
vectorization factor according to the loads and stores.
--- 1371,1388 ----
}
! /* Function vect_analyze_loop_2.
Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the
loop_vec_info struct. */
! static bool
! vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
bool ok, dummy;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
/* Find all data references in the loop (which correspond to vdefs/vuses)
and analyze their evolution in the loop. Also adjust the minimal
vectorization factor according to the loads and stores.
*************** vect_analyze_loop (struct loop *loop)
*** 1418,1425 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data references.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Classify all cross-iteration scalar data-flow cycles.
--- 1395,1401 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data references.");
! return false;
}
/* Classify all cross-iteration scalar data-flow cycles.
*************** vect_analyze_loop (struct loop *loop)
*** 1436,1443 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unexpected pattern.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Analyze data dependences between the data-refs in the loop
--- 1412,1418 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unexpected pattern.");
! return false;
}
/* Analyze data dependences between the data-refs in the loop
*************** vect_analyze_loop (struct loop *loop)
*** 1451,1458 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data dependence.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
ok = vect_determine_vectorization_factor (loop_vinfo);
--- 1426,1432 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data dependence.");
! return false;
}
ok = vect_determine_vectorization_factor (loop_vinfo);
*************** vect_analyze_loop (struct loop *loop)
*** 1460,1474 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "can't determine vectorization factor.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data dependence.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Analyze the alignment of the data-refs in the loop.
--- 1434,1446 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "can't determine vectorization factor.");
! return false;
}
if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data dependence.");
! return false;
}
/* Analyze the alignment of the data-refs in the loop.
*************** vect_analyze_loop (struct loop *loop)
*** 1479,1486 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Analyze the access patterns of the data-refs in the loop (consecutive,
--- 1451,1457 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment.");
! return false;
}
/* Analyze the access patterns of the data-refs in the loop (consecutive,
*************** vect_analyze_loop (struct loop *loop)
*** 1491,1498 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data access.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Prune the list of ddrs to be tested at run-time by versioning for alias.
--- 1462,1468 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data access.");
! return false;
}
/* Prune the list of ddrs to be tested at run-time by versioning for alias.
*************** vect_analyze_loop (struct loop *loop)
*** 1504,1511 ****
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "too long list of versioning for alias "
"run-time tests.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* This pass will decide on using loop versioning and/or loop peeling in
--- 1474,1480 ----
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "too long list of versioning for alias "
"run-time tests.");
! return false;
}
/* This pass will decide on using loop versioning and/or loop peeling in
*************** vect_analyze_loop (struct loop *loop)
*** 1516,1523 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment.");
! destroy_loop_vec_info (loop_vinfo, true);
! return NULL;
}
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
--- 1485,1491 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad data alignment.");
! return false;
}
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
*************** vect_analyze_loop (struct loop *loop)
*** 1539,1551 ****
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad operation or unsupported loop bound.");
! destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
! LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
! return loop_vinfo;
}
--- 1507,1576 ----
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "bad operation or unsupported loop bound.");
! return false;
! }
!
! return true;
! }
!
! /* Function vect_analyze_loop.
!
! Apply a set of analyses on LOOP, and create a loop_vec_info struct
! for it. The different analyses will record information in the
! loop_vec_info struct. */
! loop_vec_info
! vect_analyze_loop (struct loop *loop)
! {
! loop_vec_info loop_vinfo;
! unsigned int vector_sizes;
!
! /* Autodetect first vector size we try. */
! current_vector_size = 0;
! vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
!
! if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "===== analyze_loop_nest =====");
!
! if (loop_outer (loop)
! && loop_vec_info_for_loop (loop_outer (loop))
! && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
! {
! if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "outer-loop already vectorized.");
return NULL;
}
! do
! {
! /* Check the CFG characteristics of the loop (nesting, entry/exit). */
! loop_vinfo = vect_analyze_loop_form (loop);
! if (!loop_vinfo)
! {
! if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "bad loop form.");
! return NULL;
! }
!
! if (vect_analyze_loop_2 (loop_vinfo))
! {
! LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
! return loop_vinfo;
! }
!
! destroy_loop_vec_info (loop_vinfo, true);
!
! vector_sizes &= ~current_vector_size;
! if (vector_sizes == 0)
! return NULL;
!
! /* Try the next biggest vector size. */
! current_vector_size = 1 << floor_log2 (vector_sizes);
! if (vect_print_dump_info (REPORT_DETAILS))
! fprintf (vect_dump, "***** Re-trying analysis with "
! "vector size %d\n", current_vector_size);
! }
! while (1);
}
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig 2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-slp.c 2010-10-06 13:25:09.000000000 +0200
*************** vect_slp_analyze_bb (basic_block bb)
*** 1643,1648 ****
--- 1643,1649 ----
int max_vf = MAX_VECTORIZATION_FACTOR;
bool data_dependence_in_bb = false;
+ current_vector_size = 0;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "===vect_slp_analyze_bb===\n");