[PATCH] Fix dr_explicit_realign_optimized handling in the vectorizer (PR tree-optimization/63341)

Richard Biener rguenther@suse.de
Thu Sep 25 07:40:00 GMT 2014


On Thu, 25 Sep 2014, Jakub Jelinek wrote:

> Hi!
> 
> As the testcases show, dr_explicit_realign_optimized (used on PowerPC/SPU
> only) misbehaves if the base_address is in between 1 and vector element size - 1
> modulo vector size.
> The problem is that it wants to add a bias to base_addr such that
> base_addr & ~vector_size
> (base_addr + bias) & ~vector_size
> are adjacent vector_size memory slots, but vect_create_data_ref_ptr
> takes offset counted in vector elements and in the end multiplies that
> by vector element size, so we end up actually with:
> (base_addr + ((vector_size / vector_element_size) * vector_element_size) & ~vector_size
> which unfortunately is not enough, e.g. in the testcase
> base_addr is 1 moduo vector_size, vector_size 16 and vector_element_size 2,
> so we have
> base_addr & ~16
> (base_addr + 14) & ~16
> instead of the desired
> (base_addr + 15) & ~16
> and 1 & ~16 and (1 + 14) & ~16 is the same address.
> 
> Fixed by passing another offset, measured in bytes (for the negative case
> which are the only 3 other cases which pass any offset down we want it to be
> as is), bootstrapped/regtested on x86_64-linux and i686-linux and on
> {x86_64,i686,powerpc{,64},s390{,x}}-linux on the 4.8 branch.
> 
> Ok for trunk/4.9/4.8?

Ok.

Thanks,
Richard.

> 2014-09-25  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/63341
> 	* tree-vectorizer.h (vect_create_data_ref_ptr,
> 	vect_create_addr_base_for_vector_ref): Add another tree argument
> 	defaulting to NULL_TREE.
> 	* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add byte_offset
> 	argument, pass it down to vect_create_addr_base_for_vector_ref.
> 	(vect_create_addr_base_for_vector_ref): Add byte_offset argument,
> 	add that to base_offset too if non-NULL.
> 	* tree-vect-stmts.c (vectorizable_load): Add byte_offset variable,
> 	for dr_explicit_realign_optimized set it to vector byte size
> 	- 1 instead of setting offset, pass byte_offset down to
> 	vect_create_data_ref_ptr.
> 
> 	* gcc.dg/vect/pr63341-1.c: New test.
> 	* gcc.dg/vect/pr63341-2.c: New test.
> 
> --- gcc/tree-vectorizer.h.jj	2014-09-01 09:43:56.000000000 +0200
> +++ gcc/tree-vectorizer.h	2014-09-23 15:19:28.302484227 +0200
> @@ -1061,7 +1061,8 @@ extern bool vect_analyze_data_refs (loop
>  				    unsigned *);
>  extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
>  				      tree *, gimple_stmt_iterator *,
> -				      gimple *, bool, bool *);
> +				      gimple *, bool, bool *,
> +				      tree = NULL_TREE);
>  extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
>  extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
> @@ -1078,7 +1079,8 @@ extern void vect_transform_grouped_load
>  extern void vect_record_grouped_load_vectors (gimple, vec<tree> );
>  extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
>  extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
> -                                                  tree, struct loop *);
> +						  tree, struct loop *,
> +						  tree = NULL_TREE);
>  
>  /* In tree-vect-loop.c.  */
>  /* FORNOW: Used in tree-parloops.c.  */
> --- gcc/tree-vect-data-refs.c.jj	2014-09-18 15:48:22.000000000 +0200
> +++ gcc/tree-vect-data-refs.c	2014-09-23 15:11:06.163061112 +0200
> @@ -3860,6 +3860,9 @@ vect_get_new_vect_var (tree type, enum v
>  	    is as follows:
>  	    if LOOP=i_loop:	&in		(relative to i_loop)
>  	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
> +   BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
> +	    initial address.  Unlike OFFSET, which is number of elements to
> +	    be added, BYTE_OFFSET is measured in bytes.
>  
>     Output:
>     1. Return an SSA_NAME whose value is the address of the memory location of
> @@ -3873,7 +3876,8 @@ tree
>  vect_create_addr_base_for_vector_ref (gimple stmt,
>  				      gimple_seq *new_stmt_list,
>  				      tree offset,
> -				      struct loop *loop)
> +				      struct loop *loop,
> +				      tree byte_offset)
>  {
>    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> @@ -3926,6 +3930,12 @@ vect_create_addr_base_for_vector_ref (gi
>        base_offset = fold_build2 (PLUS_EXPR, sizetype,
>  				 base_offset, offset);
>      }
> +  if (byte_offset)
> +    {
> +      byte_offset = fold_convert (sizetype, byte_offset);
> +      base_offset = fold_build2 (PLUS_EXPR, sizetype,
> +				 base_offset, byte_offset);
> +    }
>  
>    /* base + base_offset */
>    if (loop_vinfo)
> @@ -3983,6 +3993,10 @@ vect_create_addr_base_for_vector_ref (gi
>     5. BSI: location where the new stmts are to be placed if there is no loop
>     6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
>          pointing to the initial address.
> +   7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
> +	to the initial address accessed by the data-ref in STMT.  This is
> +	similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
> +	in bytes.
>  
>     Output:
>     1. Declare a new ptr to vector_type, and have it point to the base of the
> @@ -3996,6 +4010,8 @@ vect_create_addr_base_for_vector_ref (gi
>           initial_address = &a[init];
>        if OFFSET is supplied:
>           initial_address = &a[init + OFFSET];
> +      if BYTE_OFFSET is supplied:
> +	 initial_address = &a[init] + BYTE_OFFSET;
>  
>        Return the initial_address in INITIAL_ADDRESS.
>  
> @@ -4013,7 +4029,7 @@ tree
>  vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
>  			  tree offset, tree *initial_address,
>  			  gimple_stmt_iterator *gsi, gimple *ptr_incr,
> -			  bool only_init, bool *inv_p)
> +			  bool only_init, bool *inv_p, tree byte_offset)
>  {
>    const char *base_name;
>    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> @@ -4156,10 +4172,10 @@ vect_create_data_ref_ptr (gimple stmt, t
>    /* (2) Calculate the initial address of the aggregate-pointer, and set
>       the aggregate-pointer to point to it before the loop.  */
>  
> -  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
> +  /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader.  */
>  
>    new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
> -                                                   offset, loop);
> +						   offset, loop, byte_offset);
>    if (new_stmt_list)
>      {
>        if (pe)
> --- gcc/tree-vect-stmts.c.jj	2014-08-01 09:24:12.000000000 +0200
> +++ gcc/tree-vect-stmts.c	2014-09-23 15:09:31.832553172 +0200
> @@ -5600,6 +5600,7 @@ vectorizable_load (gimple stmt, gimple_s
>    int i, j, group_size, group_gap;
>    tree msq = NULL_TREE, lsq;
>    tree offset = NULL_TREE;
> +  tree byte_offset = NULL_TREE;
>    tree realignment_token = NULL_TREE;
>    gimple phi = NULL;
>    vec<tree> dr_chain = vNULL;
> @@ -6261,7 +6262,8 @@ vectorizable_load (gimple stmt, gimple_s
>        if (alignment_support_scheme == dr_explicit_realign_optimized)
>  	{
>  	  phi = SSA_NAME_DEF_STMT (msq);
> -	  offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
> +	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
> +				    size_one_node);
>  	}
>      }
>    else
> @@ -6302,7 +6304,8 @@ vectorizable_load (gimple stmt, gimple_s
>  	    dataref_ptr
>  	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
>  					  offset, &dummy, gsi, &ptr_incr,
> -					  simd_lane_access_p, &inv_p);
> +					  simd_lane_access_p, &inv_p,
> +					  byte_offset);
>  	}
>        else if (dataref_offset)
>  	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
> --- gcc/testsuite/gcc.dg/vect/pr63341-1.c.jj	2014-09-23 15:27:24.658025472 +0200
> +++ gcc/testsuite/gcc.dg/vect/pr63341-1.c	2014-09-23 15:28:16.645757979 +0200
> @@ -0,0 +1,32 @@
> +/* PR tree-optimization/63341 */
> +/* { dg-do run } */
> +
> +#include "tree-vect.h"
> +
> +typedef union U { unsigned short s; unsigned char c; } __attribute__((packed)) U;
> +struct S { char e __attribute__((aligned (64))); U s[32]; };
> +struct S t = {0, {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8},
> +		  {9}, {10}, {11}, {12}, {13}, {14}, {15}, {16},
> +		  {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24},
> +		  {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}}};
> +unsigned short d[32] = { 1 };
> +
> +__attribute__((noinline, noclone)) void
> +foo ()
> +{
> +  int i;
> +  for (i = 0; i < 32; i++)
> +    d[i] = t.s[i].s;
> +  if (__builtin_memcmp (d, t.s, sizeof d))
> +    abort ();
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  foo ();
> +  return 0;
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> --- gcc/testsuite/gcc.dg/vect/pr63341-2.c.jj	2014-09-23 15:27:27.906013904 +0200
> +++ gcc/testsuite/gcc.dg/vect/pr63341-2.c	2014-09-23 15:30:29.874081634 +0200
> @@ -0,0 +1,35 @@
> +/* PR tree-optimization/63341 */
> +/* { dg-do run } */
> +
> +#include "tree-vect.h"
> +
> +typedef union U { unsigned short s; unsigned char c; } __attribute__((packed)) U;
> +struct S { char e __attribute__((aligned (64))); U s[32]; };
> +struct S t = {0, {{0x5010}, {0x5111}, {0x5212}, {0x5313}, {0x5414}, {0x5515}, {0x5616}, {0x5717},
> +		  {0x5818}, {0x5919}, {0x5a1a}, {0x5b1b}, {0x5c1c}, {0x5d1d}, {0x5e1e}, {0x5f1f},
> +		  {0x6020}, {0x6121}, {0x6222}, {0x6323}, {0x6424}, {0x6525}, {0x6626}, {0x6727},
> +		  {0x6828}, {0x6929}, {0x6a2a}, {0x6b2b}, {0x6c2c}, {0x6d2d}, {0x6e2e}, {0x6f2f}}};
> +unsigned short d[32] = { 1 };
> +
> +__attribute__((noinline, noclone)) void
> +foo ()
> +{
> +  int i;
> +  for (i = 0; i < 32; i++)
> +    d[i] = t.s[i].s + 4;
> +  for (i = 0; i < 32; i++)
> +    if (d[i] != t.s[i].s + 4)
> +      abort ();
> +    else
> +      asm volatile ("" : : : "memory");
> +}
> +
> +int
> +main ()
> +{
> +  check_vect ();
> +  foo ();
> +  return 0;
> +}
> +
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer



More information about the Gcc-patches mailing list