This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Teach the vectorizer about multiple sizes


This teaches the vectorizer about multiple possible SIMD vector sizes
as for example supported by x86 with AVX and SSE.  This is the least
intrusive approach of adding support for autovectorizing using AVX
without regressing compared to what loops are vectorized with SSE
(because we'll fall back to SSE if AVX doesn't work).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I also built SPEC CPU 2006 with -mavx and fixed the fallout - a
bogus builtin used for V8SF -> V8SI vectorization and us rejecting
shuffle masks (int!) for 32byte floating-point shuffles (oh well ...).

Ok for trunk?

Thanks,
Richard.

2010-10-06  Richard Guenther  <rguenther@suse.de>

	* tree-vectorizer.h (current_vector_size): Declare.
	* tree-vect-stmts.c (perm_mask_for_reverse): Check if the
	mask vector type is available.
	(get_vectype_for_scalar_type): Rename to ...
	(get_vectype_for_scalar_type_and_size): ... this.  Get a vector
	size argument.
	(get_vectype_for_scalar_type): New wrapper around
	get_vectype_for_scalar_type_and_size using current_vector_size.
	(get_same_sized_vectype): Use get_vectype_for_scalar_type_and_size.
	* tree-vect-loop.c (vect_analyze_loop_3): Split out core part
	of vect_analyze_loop here.
	(vect_analyze_loop): Loop over vector sizes calling vect_analyze_loop_3.
	* tree-vect-slp.c (vect_slp_analyze_bb): Set current_vector_size
	to autodetect.
	* config/i386/i386.c (ix86_vectorize_builtin_conversion): Fix
	V8SF to V8SI conversion builtin.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h.orig	2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vectorizer.h	2010-10-06 13:25:09.000000000 +0200
*************** extern LOC find_loop_location (struct lo
*** 779,784 ****
--- 779,785 ----
  extern bool vect_can_advance_ivs_p (loop_vec_info);
  
  /* In tree-vect-stmts.c.  */
+ extern unsigned int current_vector_size;
  extern tree get_vectype_for_scalar_type (tree);
  extern tree get_same_sized_vectype (tree, tree);
  extern bool vect_is_simple_use (tree, loop_vec_info, bb_vec_info, gimple *,
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c.orig	2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-stmts.c	2010-10-06 13:25:09.000000000 +0200
*************** perm_mask_for_reverse (tree vectype, tre
*** 3455,3461 ****
  
    mask_type = get_vectype_for_scalar_type (mask_element_type);
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
!   if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
      return NULL;
  
    for (i = 0; i < nunits; i++)
--- 3455,3462 ----
  
    mask_type = get_vectype_for_scalar_type (mask_element_type);
    nunits = TYPE_VECTOR_SUBPARTS (vectype);
!   if (!mask_type
!       || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
      return NULL;
  
    for (i = 0; i < nunits; i++)
*************** free_stmt_vec_info (gimple stmt)
*** 4784,4796 ****
  }
  
  
! /* Function get_vectype_for_scalar_type.
  
!    Returns the vector type corresponding to SCALAR_TYPE as supported
     by the target.  */
  
! tree
! get_vectype_for_scalar_type (tree scalar_type)
  {
    enum machine_mode inner_mode = TYPE_MODE (scalar_type);
    enum machine_mode simd_mode;
--- 4785,4797 ----
  }
  
  
! /* Function get_vectype_for_scalar_type_and_size.
  
!    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
     by the target.  */
  
! static tree
! get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
  {
    enum machine_mode inner_mode = TYPE_MODE (scalar_type);
    enum machine_mode simd_mode;
*************** get_vectype_for_scalar_type (tree scalar
*** 4818,4824 ****
--- 4819,4836 ----
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
      return NULL_TREE;
  
+   /* Query the preferred simd mode and if that does not match the
+      requested size get a supported mode based on its mode class.  */
    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
+   if (size != 0
+       && size != GET_MODE_SIZE (simd_mode))
+     {
+       simd_mode = mode_for_size (size *  BITS_PER_UNIT,
+ 				 GET_MODE_CLASS (simd_mode), 0);
+       if (simd_mode == BLKmode
+ 	  || !targetm.vector_mode_supported_p (simd_mode))
+ 	return NULL_TREE;
+     }
    nunits = GET_MODE_SIZE (simd_mode) / nbytes;
    if (nunits <= 1)
      return NULL_TREE;
*************** get_vectype_for_scalar_type (tree scalar
*** 4850,4864 ****
    return vectype;
  }
  
  /* Function get_same_sized_vectype
  
     Returns a vector type corresponding to SCALAR_TYPE of size
     VECTOR_TYPE if supported by the target.  */
  
  tree
! get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
  {
!   return get_vectype_for_scalar_type (scalar_type);
  }
  
  /* Function vect_is_simple_use.
--- 4862,4896 ----
    return vectype;
  }
  
+ unsigned int current_vector_size;
+ 
+ /* Function get_vectype_for_scalar_type.
+ 
+    Returns the vector type corresponding to SCALAR_TYPE as supported
+    by the target.  */
+ 
+ tree
+ get_vectype_for_scalar_type (tree scalar_type)
+ {
+   tree vectype;
+   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
+ 						  current_vector_size);
+   if (vectype
+       && current_vector_size == 0)
+     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+   return vectype;
+ }
+ 
  /* Function get_same_sized_vectype
  
     Returns a vector type corresponding to SCALAR_TYPE of size
     VECTOR_TYPE if supported by the target.  */
  
  tree
! get_same_sized_vectype (tree scalar_type, tree vector_type)
  {
!   return get_vectype_for_scalar_type_and_size
! 	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
  }
  
  /* Function vect_is_simple_use.
Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c.orig	2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-loop.c	2010-10-06 13:28:33.000000000 +0200
*************** vect_analyze_loop_operations (loop_vec_i
*** 1371,1411 ****
  }
  
  
! /* Function vect_analyze_loop.
  
     Apply a set of analyses on LOOP, and create a loop_vec_info struct
     for it.  The different analyses will record information in the
     loop_vec_info struct.  */
! loop_vec_info
! vect_analyze_loop (struct loop *loop)
  {
    bool ok, dummy;
-   loop_vec_info loop_vinfo;
    int max_vf = MAX_VECTORIZATION_FACTOR;
    int min_vf = 2;
  
-   if (vect_print_dump_info (REPORT_DETAILS))
-     fprintf (vect_dump, "===== analyze_loop_nest =====");
- 
-   if (loop_outer (loop)
-       && loop_vec_info_for_loop (loop_outer (loop))
-       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
- 	fprintf (vect_dump, "outer-loop already vectorized.");
-       return NULL;
-     }
- 
-   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
- 
-   loop_vinfo = vect_analyze_loop_form (loop);
-   if (!loop_vinfo)
-     {
-       if (vect_print_dump_info (REPORT_DETAILS))
- 	fprintf (vect_dump, "bad loop form.");
-       return NULL;
-     }
- 
    /* Find all data references in the loop (which correspond to vdefs/vuses)
       and analyze their evolution in the loop.  Also adjust the minimal
       vectorization factor according to the loads and stores.
--- 1371,1388 ----
  }
  
  
! /* Function vect_analyze_loop_2.
  
     Apply a set of analyses on LOOP, and create a loop_vec_info struct
     for it.  The different analyses will record information in the
     loop_vec_info struct.  */
! static bool
! vect_analyze_loop_2 (loop_vec_info loop_vinfo)
  {
    bool ok, dummy;
    int max_vf = MAX_VECTORIZATION_FACTOR;
    int min_vf = 2;
  
    /* Find all data references in the loop (which correspond to vdefs/vuses)
       and analyze their evolution in the loop.  Also adjust the minimal
       vectorization factor according to the loads and stores.
*************** vect_analyze_loop (struct loop *loop)
*** 1418,1425 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data references.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Classify all cross-iteration scalar data-flow cycles.
--- 1395,1401 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data references.");
!       return false;
      }
  
    /* Classify all cross-iteration scalar data-flow cycles.
*************** vect_analyze_loop (struct loop *loop)
*** 1436,1443 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "unexpected pattern.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Analyze data dependences between the data-refs in the loop
--- 1412,1418 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "unexpected pattern.");
!       return false;
      }
  
    /* Analyze data dependences between the data-refs in the loop
*************** vect_analyze_loop (struct loop *loop)
*** 1451,1458 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data dependence.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    ok = vect_determine_vectorization_factor (loop_vinfo);
--- 1426,1432 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data dependence.");
!       return false;
      }
  
    ok = vect_determine_vectorization_factor (loop_vinfo);
*************** vect_analyze_loop (struct loop *loop)
*** 1460,1474 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "can't determine vectorization factor.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
    if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data dependence.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Analyze the alignment of the data-refs in the loop.
--- 1434,1446 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "can't determine vectorization factor.");
!       return false;
      }
    if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data dependence.");
!       return false;
      }
  
    /* Analyze the alignment of the data-refs in the loop.
*************** vect_analyze_loop (struct loop *loop)
*** 1479,1486 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data alignment.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Analyze the access patterns of the data-refs in the loop (consecutive,
--- 1451,1457 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data alignment.");
!       return false;
      }
  
    /* Analyze the access patterns of the data-refs in the loop (consecutive,
*************** vect_analyze_loop (struct loop *loop)
*** 1491,1498 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data access.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Prune the list of ddrs to be tested at run-time by versioning for alias.
--- 1462,1468 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad data access.");
!       return false;
      }
  
    /* Prune the list of ddrs to be tested at run-time by versioning for alias.
*************** vect_analyze_loop (struct loop *loop)
*** 1504,1511 ****
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "too long list of versioning for alias "
  			    "run-time tests.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* This pass will decide on using loop versioning and/or loop peeling in
--- 1474,1480 ----
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "too long list of versioning for alias "
  			    "run-time tests.");
!       return false;
      }
  
    /* This pass will decide on using loop versioning and/or loop peeling in
*************** vect_analyze_loop (struct loop *loop)
*** 1516,1523 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "bad data alignment.");
!       destroy_loop_vec_info (loop_vinfo, true);
!       return NULL;
      }
  
    /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
--- 1485,1491 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "bad data alignment.");
!       return false;
      }
  
    /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
*************** vect_analyze_loop (struct loop *loop)
*** 1539,1551 ****
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad operation or unsupported loop bound.");
!       destroy_loop_vec_info (loop_vinfo, true);
        return NULL;
      }
  
!   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
  
!   return loop_vinfo;
  }
  
  
--- 1507,1576 ----
      {
        if (vect_print_dump_info (REPORT_DETAILS))
  	fprintf (vect_dump, "bad operation or unsupported loop bound.");
!       return false;
!     }
! 
!   return true;
! }
! 
! /* Function vect_analyze_loop.
! 
!    Apply a set of analyses on LOOP, and create a loop_vec_info struct
!    for it.  The different analyses will record information in the
!    loop_vec_info struct.  */
! loop_vec_info
! vect_analyze_loop (struct loop *loop)
! {
!   loop_vec_info loop_vinfo;
!   unsigned int vector_sizes;
! 
!   /* Autodetect first vector size we try.  */
!   current_vector_size = 0;
!   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
! 
!   if (vect_print_dump_info (REPORT_DETAILS))
!     fprintf (vect_dump, "===== analyze_loop_nest =====");
! 
!   if (loop_outer (loop)
!       && loop_vec_info_for_loop (loop_outer (loop))
!       && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
!     {
!       if (vect_print_dump_info (REPORT_DETAILS))
! 	fprintf (vect_dump, "outer-loop already vectorized.");
        return NULL;
      }
  
!   do
!     {
!       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
!       loop_vinfo = vect_analyze_loop_form (loop);
!       if (!loop_vinfo)
! 	{
! 	  if (vect_print_dump_info (REPORT_DETAILS))
! 	    fprintf (vect_dump, "bad loop form.");
! 	  return NULL;
! 	}
! 
!       if (vect_analyze_loop_2 (loop_vinfo))
! 	{
! 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
  
! 	  return loop_vinfo;
! 	}
! 
!       destroy_loop_vec_info (loop_vinfo, true);
! 
!       vector_sizes &= ~current_vector_size;
!       if (vector_sizes == 0)
! 	return NULL;
! 
!       /* Try the next biggest vector size.  */
!       current_vector_size = 1 << floor_log2 (vector_sizes);
!       if (vect_print_dump_info (REPORT_DETAILS))
! 	fprintf (vect_dump, "***** Re-trying analysis with "
! 		 "vector size %d\n", current_vector_size);
!     }
!   while (1);
  }
  
  
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c.orig	2010-10-06 13:05:14.000000000 +0200
--- gcc/tree-vect-slp.c	2010-10-06 13:25:09.000000000 +0200
*************** vect_slp_analyze_bb (basic_block bb)
*** 1643,1648 ****
--- 1643,1649 ----
    int max_vf = MAX_VECTORIZATION_FACTOR;
    bool data_dependence_in_bb = false;
  
+   current_vector_size = 0;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "===vect_slp_analyze_bb===\n");


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]