[PATCH][2/n] Fix PR50444, SRA and misaligned accesses
Richard Guenther
rguenther@suse.de
Tue Jan 24 13:55:00 GMT 2012
This creates a get_pointer_alignment_1 function, similar to
get_object_alignment_1 for use by SRA (and for consistency).
SRA needs to be conservative with its alignment assertions when
building MEM_REFs. The following patch changes the single place
where we build them. Together with the posted patch to fix
expanding of stores to unaligned accesses wrapped inside component-refs
this fixes this PR.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
I'm queuing these patches to apply them in a useful order, but consider
this one quite obvious. A followup (or rather a patch that I'd apply
before) will be to reduce the number of non-scalarized loads/stores
SRA rewrites (which it ideally should not do at all).
Richard.
2012-01-24 Richard Guenther <rguenther@suse.de>
PR tree-optimization/50444
* tree.h (get_pointer_alignment_1): Declare.
* builtins.c (get_pointer_alignment_1): New function.
(get_pointer_alignment): Use it.
* tree-sra.c (build_ref_for_offset): Properly adjust the
MEM_REF type for unaligned accesses.
* gcc.dg/torture/pr50444.c: New testcase.
Index: gcc/tree.h
===================================================================
*** gcc/tree.h (revision 183470)
--- gcc/tree.h (working copy)
*************** extern bool is_builtin_fn (tree);
*** 5450,5455 ****
--- 5450,5456 ----
extern unsigned int get_object_alignment_1 (tree, unsigned HOST_WIDE_INT *);
extern unsigned int get_object_alignment (tree);
extern unsigned int get_object_or_type_alignment (tree);
+ extern unsigned int get_pointer_alignment_1 (tree, unsigned HOST_WIDE_INT *);
extern unsigned int get_pointer_alignment (tree);
extern tree fold_call_stmt (gimple, bool);
extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function);
Index: gcc/builtins.c
===================================================================
*** gcc/builtins.c (revision 183470)
--- gcc/builtins.c (working copy)
*************** get_object_or_type_alignment (tree exp)
*** 477,513 ****
return align;
}
! /* Return the alignment in bits of EXP, a pointer valued expression.
! The alignment returned is, by default, the alignment of the thing that
! EXP points to. If it is not a POINTER_TYPE, 0 is returned.
! Otherwise, look at the expression to see if we can do better, i.e., if the
! expression is actually pointing at an object whose alignment is tighter. */
unsigned int
! get_pointer_alignment (tree exp)
{
STRIP_NOPS (exp);
if (TREE_CODE (exp) == ADDR_EXPR)
! return get_object_alignment (TREE_OPERAND (exp, 0));
else if (TREE_CODE (exp) == SSA_NAME
&& POINTER_TYPE_P (TREE_TYPE (exp)))
{
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp);
- unsigned align;
if (!pi)
! return BITS_PER_UNIT;
! if (pi->misalign != 0)
! align = (pi->misalign & -pi->misalign);
! else
! align = pi->align;
! return align * BITS_PER_UNIT;
}
return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0;
}
/* Compute the length of a C string. TREE_STRING_LENGTH is not the right
way, because it could contain a zero byte in the middle.
TREE_STRING_LENGTH is the size of the character array, not the string.
--- 477,535 ----
return align;
}
! /* For a pointer valued expression EXP compute values M and N such that
! M divides (EXP - N) and such that N < M. Store N in *BITPOSP and return M.
! If EXP is not a pointer, 0 is returned. */
unsigned int
! get_pointer_alignment_1 (tree exp, unsigned HOST_WIDE_INT *bitposp)
{
STRIP_NOPS (exp);
if (TREE_CODE (exp) == ADDR_EXPR)
! return get_object_alignment_1 (TREE_OPERAND (exp, 0), bitposp);
else if (TREE_CODE (exp) == SSA_NAME
&& POINTER_TYPE_P (TREE_TYPE (exp)))
{
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (exp);
if (!pi)
! {
! *bitposp = 0;
! return BITS_PER_UNIT;
! }
! *bitposp = pi->misalign * BITS_PER_UNIT;
! return pi->align * BITS_PER_UNIT;
}
+ *bitposp = 0;
return POINTER_TYPE_P (TREE_TYPE (exp)) ? BITS_PER_UNIT : 0;
}
+ /* Return the alignment in bits of EXP, a pointer valued expression.
+ The alignment returned is, by default, the alignment of the thing that
+ EXP points to. If it is not a POINTER_TYPE, 0 is returned.
+
+ Otherwise, look at the expression to see if we can do better, i.e., if the
+ expression is actually pointing at an object whose alignment is tighter. */
+
+ unsigned int
+ get_pointer_alignment (tree exp)
+ {
+ unsigned HOST_WIDE_INT bitpos = 0;
+ unsigned int align;
+
+ align = get_pointer_alignment_1 (exp, &bitpos);
+
+ /* align and bitpos now specify known low bits of the pointer.
+ ptr & (align - 1) == bitpos. */
+
+ if (bitpos != 0)
+ align = (bitpos & -bitpos);
+
+ return align;
+ }
+
/* Compute the length of a C string. TREE_STRING_LENGTH is not the right
way, because it could contain a zero byte in the middle.
TREE_STRING_LENGTH is the size of the character array, not the string.
Index: gcc/tree-sra.c
===================================================================
*** gcc/tree-sra.c (revision 183470)
--- gcc/tree-sra.c (working copy)
*************** build_ref_for_offset (location_t loc, tr
*** 1461,1466 ****
--- 1461,1468 ----
tree prev_base = base;
tree off;
HOST_WIDE_INT base_offset;
+ unsigned HOST_WIDE_INT misalign;
+ unsigned int align;
gcc_checking_assert (offset % BITS_PER_UNIT == 0);
*************** build_ref_for_offset (location_t loc, tr
*** 1506,1511 ****
--- 1508,1530 ----
base = build_fold_addr_expr (unshare_expr (base));
}
+ /* If prev_base were always an originally performed access
+ we can extract more optimistic alignment information
+ by looking at the access mode. That would constrain the
+ alignment of base + base_offset which we would need to
+ adjust according to offset.
+ ??? But it is not at all clear that prev_base is an access
+ that was in the IL that way, so be conservative for now. */
+ align = get_pointer_alignment_1 (base, &misalign);
+ misalign += (double_int_sext (tree_to_double_int (off),
+ TYPE_PRECISION (TREE_TYPE (off))).low
+ * BITS_PER_UNIT);
+ misalign = misalign & (align - 1);
+ if (misalign != 0)
+ align = (misalign & -misalign);
+ if (align < TYPE_ALIGN (exp_type))
+ exp_type = build_aligned_type (exp_type, align);
+
return fold_build2_loc (loc, MEM_REF, exp_type, base, off);
}
Index: gcc/testsuite/gcc.dg/torture/pr50444.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr50444.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr50444.c (revision 0)
***************
*** 0 ****
--- 1,76 ----
+ /* { dg-require-effective-target sse2_runtime } */
+ /* { dg-do run } */
+ /* { dg-options "-msse2" } */
+
+ typedef long long __m128i __attribute__ ((__vector_size__ (16),
+ __may_alias__));
+ typedef int __v4si __attribute__ ((__vector_size__ (16)));
+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+ typedef unsigned int uint32_t;
+
+ typedef struct {
+ uint32_t v[4];
+ } a4x32;
+
+ a4x32* incr(a4x32* x)
+ {
+ x->v[0] += 1;
+ return x;
+ }
+
+ typedef struct {
+ __m128i m;
+ } a1xm128i;
+
+ static inline a1xm128i ssefunc( a1xm128i in, a1xm128i k)
+ {
+ a1xm128i ret;
+ ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m);
+ return ret;
+ }
+
+ static a4x32 caster( a4x32 c4x32, a1xm128i k)
+ {
+ a1xm128i c1x128;
+ if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort();
+ __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128));
+ c1x128 = ssefunc(c1x128, k);
+ __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32));
+ return c4x32;
+ }
+
+ typedef struct {
+ a1xm128i key;
+ a4x32 c;
+ __SIZE_TYPE__ elem;
+ a4x32 v;
+ } Engine;
+
+ void ctor(Engine *e)
+ {
+ e->elem = 0;
+ e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 };
+ e->c.v[0] = 0;
+ e->c.v[1] = 0;
+ e->c.v[2] = 0;
+ e->c.v[3] = 0;
+ }
+
+ uint32_t method( Engine *e)
+ {
+ if( e->elem == 0 )
+ {
+ e->v = caster(*incr(&e->c), e->key);
+ e->elem = 4;
+ }
+ return e->v.v[--e->elem];
+ }
+
+ int main()
+ {
+ Engine e4; ctor(&e4);
+ Engine e5; ctor(&e5);
+ if(method(&e4)!=method(&e5))
+ __builtin_abort ();
+ return 0;
+ }
More information about the Gcc-patches
mailing list