This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC PATCH] fold one element {mem{set,cpy,pcpy,move},b{zero,copy}} (PR middle-end/27567)


On 9/28/06, Richard Guenther <richard.guenther@gmail.com> wrote:
On 9/1/06, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Aug 31, 2006 at 06:04:37AM -0600, Roger Sayle wrote:
> > This is OK for mainline, provided its been boostrapped and regression
> > tested somewhere, as PR middle-end/27567 is a P2 regression affecting
> > all open branches.  However, as it's a bit intrusive, and I'm cautious
> > of corner cases or potential interactions on STRICT_ALIGNMENT targets,
> > let's leave it a week or two on mainline to confirm there are no problems
> > before backporting to 4.1 and 4.0.
>
> While it seemed to generate correct code even for unaligned
> memcpy/memset/etc. arguments, at least on STRICT_ALIGNMENT arches the
> generated code was different after the patch, but not always better
> and even for !STRICT_ALIGNMENT arches I'm not 100% sure if direct
> assignment is always what would be used by
> emit_block_move/clear_storage.  So at least for now I'd prefer to only
> do this for aligned memory and keep unaligned memcpy/memset/...
> until RTL expansion.  Are you ok with that change?
>
> > If you could spin this patch on your build farm, the wide number of
> > architectures covered would go a long way to easing my concerns.
>
> The attached 4.2 patch was bootstrapped/regtested on i686-linux and
> regtested on x86_64-linux, the 4.1 patch (which is except for whitespace
> identical) has been bootstrapped/regtested on
> {i386,x86_64,ia64,ppc,ppc64,s390,s390x}-linux.

This patch causes tramp3d-v4 and DLV to be miscompiled on x86_64 (-O2
-funroll-loops).
Reverting it fixes the miscompilations, r116641 is unrelated
(reverting that doesn't fix
the failures).

The segfault is from

(gdb) run
Starting program: /abuild/rguenther/tramp3d-v4 -n 1

Program received signal SIGSEGV, Segmentation fault.
DomainMapNode<Interval<1>, int>::insert (this=0x70f200, v=@0x7fbfffbc10)
    at tramp3d-v4.cpp:5062
5062          memcpy(&head_m, &p->next_m, sizeof(head_m));

which is from some custom allocator which does pointer copying via
memcpy (yuck):

class Pool
{
...
  inline void* alloc()
    {
      outstandingAllocs_m += 1;
      if ( head_m==0 )
 grow();
      Link *p = head_m;
      memcpy(&head_m, &p->next_m, sizeof(head_m));
      return p;
    }
...
private:
  struct Link { Link *next_m; };
  void __attribute__((noinline)) grow();
  Link *head_m;
...
  std::vector<char*> chunks_m;
}

void Pool::grow()
{
...
  char *start = new char[alloc_this];
  chunks_m.push_back(start);
  char *last = start + (nblock_m-1)*bsize_m;
  for (char *p=start; p!=last; p+=bsize_m)
    ((Link*)p)->next_m = (Link*)(p+bsize_m);
  ((Link*)last)->next_m = head_m;
  head_m = (Link*)start;
}

I think this may be removing the memcpy causes aliasing issues to pop up.
Note that with memcpy optimized we see *head_m read as Link* and further
down written/read as the type allocated:

  #   VUSE <SMT.10022_1130>;
  p_439 = pool_s.head_m;
  #   VUSE <SMT.10022_1130>;
  D.292143_440 = p_439->next_m;
  #   SMT.10022_1165 = V_MAY_DEF <SMT.10022_1130>;
  pool_s.head_m = D.292143_440;
  D.267536_442 = p_439;
  this_446 = (struct DomainMapNode<Interval<1>,int> *) D.267536_442;
  #   VUSE <SMT.10018_1121>;
  D.292211_489 = leftdom.D.49893.D.49707.domain_m[0];
  #   SMT.10018_1166 = V_MAY_DEF <SMT.10018_1121>;
  this_446->domain_m.D.49893.D.49707.domain_m[0] = D.292211_489;

note how *p_439 is assigned SMT.10022 and SMT.10018.

But as memcpy is required to work aliasing-unaware (i.e. it is the proper way
of doing type-punning), optimizing memcpy to an assignment may be dangerous.

(http://www.suse.de/~rguenther/tramp3d-v4-small.cpp contains a reduced (parts of
main are #if 0'ed) testcase that segfaults with -O2 -funroll-loops on x86_64)

Here's a testcase that results in wrong alias information:


struct Foo {
 int a;
 int b;
};
struct Node;
struct Node {
 struct Node *next;
};
struct Node *pool;
void grow (void)
{
 char *mem = __builtin_malloc (4096);
 struct Node *node = (struct Node *)mem;
 struct Foo *foo;
 __builtin_memcpy (&node->next, &pool, sizeof(struct Node*));
 pool = node;
}
static inline void *alloc_one (void)
{
 struct Node *node = pool;
 __builtin_memcpy (&pool, &node->next, sizeof(struct Node*));
 return node;
}
static inline void dealloc_one (void *p)
{
 struct Node *node = p;
 __builtin_memcpy (&node->next, &pool, sizeof(struct Node*));
 pool = node;
}
void bar(void)
{
 grow();
 struct Foo* foo = alloc_one();
 foo->a = 0;
 foo->b = -1;
 dealloc_one (foo);
 foo = alloc_one();
 foo->a = 0;
 foo->b = -1;
 dealloc_one (foo);
 grow();
}

You can see in .alias1 that

 #   SMT.33_25 = V_MAY_DEF <SMT.33_24>;
 foo_7->a = 0;
 #   SMT.33_26 = V_MAY_DEF <SMT.33_25>;
 foo_7->b = -1;
 p_8 = foo_7;
 node_9 = (struct Node *) p_8;
 #   VUSE <pool_4>;
 pool.1_10 = pool;
 #   pool_27 = V_MAY_DEF <pool_4>;
 node_9->next = pool.1_10;

the last statement needs to clobber SMT.33 here.  With memcpy retained here
we get the correct

 #   SMT.33_25 = V_MAY_DEF <SMT.33_24>;
 foo_6->a = 0;
 #   SMT.33_26 = V_MAY_DEF <SMT.33_25>;
 foo_6->b = -1;
 p_7 = foo_6;
 node_8 = (struct Node *) p_7;
 D.1936_9 = &node_8->next;
 #   pool_27 = V_MAY_DEF <pool_23>;
 #   SMT.33_28 = V_MAY_DEF <SMT.33_26>;
 memfoo (D.1936_9, &pool, 8);

Richard.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]