[PATCH][2/n] Fix PR50444, SRA and misaligned accesses

Richard Guenther rguenther@suse.de
Tue Jan 24 13:55:00 GMT 2012


This creates a get_pointer_alignment_1 function, similar to
get_object_alignment_1 for use by SRA (and for consistency).

SRA needs to be conservative with its alignment assertions when
building MEM_REFs.  The following patch changes the single place
where we build them.  Together with the posted patch to fix
expanding of stores to unaligned accesses wrapped inside component-refs
this fixes this PR.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'm queuing these patches to apply them in a useful order, but consider
this one quite obvious.  A followup (or rather a patch that I'd apply
before) will be to reduce the number of non-scalarized loads/stores
SRA rewrites (which it ideally should not do at all).

Richard.

2012-01-24  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/50444
	* tree.h (get_pointer_alignment_1): Declare.
	* builtins.c (get_pointer_alignment_1): New function.
	(get_pointer_alignment): Use it.
	* tree-sra.c (build_ref_for_offset): Properly adjust the
	MEM_REF type for unaligned accesses.

	* gcc.dg/torture/pr50444.c: New testcase.

Index: gcc/tree.h
===================================================================
*** gcc/tree.h	(revision 183470)
--- gcc/tree.h	(working copy)
*************** extern bool is_builtin_fn (tree);
*** 5450,5455 ****
--- 5450,5456 ----
  extern unsigned int get_object_alignment_1 (tree, unsigned HOST_WIDE_INT *);
  extern unsigned int get_object_alignment (tree);
  extern unsigned int get_object_or_type_alignment (tree);
+ extern unsigned int get_pointer_alignment_1 (tree, unsigned HOST_WIDE_INT *);
  extern unsigned int get_pointer_alignment (tree);
  extern tree fold_call_stmt (gimple, bool);
  extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function);
Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c	(revision 183470)
--- gcc/builtins.c	(working copy)
*************** get_object_or_type_alignment (tree exp)
*** 477,513 ****
    return align;
  }
  
! /* Return the alignment in bits of EXP, a pointer valued expression.
!    The alignment returned is, by default, the alignment of the thing that
!    EXP points to.  If it is not a POINTER_TYPE, 0 is returned.
  
!    Otherwise, look at the expression to see if we can do better, i.e., if the
!    expression is actually pointing at an object whose alignment is tighter.  */
  
  unsigned int
! get_pointer_alignment (tree exp)
  {
    STRIP_NOPS (exp);
  
    if (TREE_CODE (exp) == ADDR_EXPR)
!     return get_object_alignment (TREE_OPERAND (exp, 0));
    else if (TREE_CODE (exp) == SSA_NAME
  	   && POINTER_TYPE_P (TREE_TYPE (exp)))
      {
        struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp);
-       unsigned align;
        if (!pi)
! 	return BITS_PER_UNIT;
!       if (pi->misalign != 0)
! 	align = (pi->misalign & -pi->misalign);
!       else
! 	align = pi->align;
!       return align * BITS_PER_UNIT;
      }
  
    return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0;
  }
  
  /* Compute the length of a C string.  TREE_STRING_LENGTH is not the right
     way, because it could contain a zero byte in the middle.
     TREE_STRING_LENGTH is the size of the character array, not the string.
--- 477,535 ----
    return align;
  }
  
! /* For a pointer valued expression EXP compute values M and N such that
!    M divides (EXP - N) and such that N < M.  Store N in *BITPOSP and return M.
  
!    If EXP is not a pointer, 0 is returned.  */
  
  unsigned int
! get_pointer_alignment_1 (tree exp, unsigned HOST_WIDE_INT *bitposp)
  {
    STRIP_NOPS (exp);
  
    if (TREE_CODE (exp) == ADDR_EXPR)
!     return get_object_alignment_1 (TREE_OPERAND (exp, 0), bitposp);
    else if (TREE_CODE (exp) == SSA_NAME
  	   && POINTER_TYPE_P (TREE_TYPE (exp)))
      {
        struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp);
        if (!pi)
! 	{
! 	  *bitposp = 0;
! 	  return BITS_PER_UNIT;
! 	}
!       *bitposp = pi->misalign * BITS_PER_UNIT;
!       return pi->align * BITS_PER_UNIT;
      }
  
+   *bitposp = 0;
    return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0;
  }
  
+ /* Return the alignment in bits of EXP, a pointer valued expression.
+    The alignment returned is, by default, the alignment of the thing that
+    EXP points to.  If it is not a POINTER_TYPE, 0 is returned.
+ 
+    Otherwise, look at the expression to see if we can do better, i.e., if the
+    expression is actually pointing at an object whose alignment is tighter.  */
+ 
+ unsigned int
+ get_pointer_alignment (tree exp)
+ {
+   unsigned HOST_WIDE_INT bitpos = 0;
+   unsigned int align;
+   
+   align = get_pointer_alignment_1 (exp, &bitpos);
+ 
+   /* align and bitpos now specify known low bits of the pointer.
+      ptr & (align - 1) == bitpos.  */
+ 
+   if (bitpos != 0)
+     align = (bitpos & -bitpos);
+ 
+   return align;
+ }
+ 
  /* Compute the length of a C string.  TREE_STRING_LENGTH is not the right
     way, because it could contain a zero byte in the middle.
     TREE_STRING_LENGTH is the size of the character array, not the string.
Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c	(revision 183470)
--- gcc/tree-sra.c	(working copy)
*************** build_ref_for_offset (location_t loc, tr
*** 1461,1466 ****
--- 1461,1468 ----
    tree prev_base = base;
    tree off;
    HOST_WIDE_INT base_offset;
+   unsigned HOST_WIDE_INT misalign;
+   unsigned int align;
  
    gcc_checking_assert (offset % BITS_PER_UNIT == 0);
  
*************** build_ref_for_offset (location_t loc, tr
*** 1506,1511 ****
--- 1508,1530 ----
        base = build_fold_addr_expr (unshare_expr (base));
      }
  
+   /* If prev_base were always an originally performed access
+      we can extract more optimistic alignment information
+      by looking at the access mode.  That would constrain the
+      alignment of base + base_offset which we would need to
+      adjust according to offset.
+      ???  But it is not at all clear that prev_base is an access
+      that was in the IL that way, so be conservative for now.  */
+   align = get_pointer_alignment_1 (base, &misalign);
+   misalign += (double_int_sext (tree_to_double_int (off),
+ 				TYPE_PRECISION (TREE_TYPE (off))).low
+ 	       * BITS_PER_UNIT);
+   misalign = misalign & (align - 1);
+   if (misalign != 0)
+     align = (misalign & -misalign);
+   if (align < TYPE_ALIGN (exp_type))
+     exp_type = build_aligned_type (exp_type, align);
+ 
    return fold_build2_loc (loc, MEM_REF, exp_type, base, off);
  }
  
Index: gcc/testsuite/gcc.dg/torture/pr50444.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr50444.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr50444.c	(revision 0)
***************
*** 0 ****
--- 1,76 ----
+ /* { dg-require-effective-target sse2_runtime } */
+ /* { dg-do run } */
+ /* { dg-options "-msse2" } */
+ 
+ typedef long long __m128i __attribute__ ((__vector_size__ (16),
+ __may_alias__));
+ typedef int __v4si __attribute__ ((__vector_size__ (16)));
+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+ typedef unsigned int uint32_t;
+ 
+ typedef struct {
+     uint32_t v[4];
+ } a4x32;
+ 
+ a4x32* incr(a4x32* x)
+ {
+   x->v[0] += 1;
+   return x;
+ }
+ 
+ typedef struct {
+     __m128i m;
+ } a1xm128i;
+ 
+ static inline  a1xm128i ssefunc( a1xm128i in,  a1xm128i k)
+ {
+   a1xm128i ret;
+   ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m);
+   return ret;
+ }
+ 
+ static  a4x32  caster( a4x32 c4x32,  a1xm128i k)
+ {
+   a1xm128i c1x128;
+   if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort();
+   __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128));
+   c1x128 = ssefunc(c1x128, k);
+   __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32));
+   return c4x32;
+ }
+ 
+ typedef struct  {
+     a1xm128i key;
+     a4x32 c;
+     __SIZE_TYPE__ elem;
+     a4x32 v;
+ } Engine;
+ 
+ void ctor(Engine *e)
+ {
+   e->elem = 0;
+   e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 };
+   e->c.v[0] = 0;
+   e->c.v[1] = 0;
+   e->c.v[2] = 0;
+   e->c.v[3] = 0;
+ }
+ 
+ uint32_t method( Engine *e)
+ {
+   if( e->elem == 0 )
+     {
+       e->v = caster(*incr(&e->c), e->key);
+       e->elem = 4;
+     }
+   return e->v.v[--e->elem];
+ }
+ 
+ int main()
+ {
+   Engine e4; ctor(&e4);
+   Engine e5; ctor(&e5);
+   if(method(&e4)!=method(&e5))
+     __builtin_abort ();
+   return 0;
+ }



More information about the Gcc-patches mailing list