This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug middle-end/57748] [4.7/4.8/4.9 Regression] ICE when expanding assignment to unaligned zero-sized array
- From: "bernd.edlinger at hotmail dot de" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Wed, 04 Sep 2013 10:17:34 +0000
- Subject: [Bug middle-end/57748] [4.7/4.8/4.9 Regression] ICE when expanding assignment to unaligned zero-sized array
- Auto-submitted: auto-generated
- References: <bug-57748-4 at http dot gcc dot gnu dot org/bugzilla/>
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57748
--- Comment #31 from Bernd Edlinger <bernd.edlinger at hotmail dot de> ---
(In reply to Richard Biener from comment #29)
> (In reply to Bernd Edlinger from comment #28)
> > (In reply to Richard Biener from comment #19)
> > > Barking up wrong trees. Hacky fix looks like:
> > >
> > > Index: gcc/expr.c
> > > ===================================================================
> > > --- gcc/expr.c (revision 202043)
> > > +++ gcc/expr.c (working copy)
> > > @@ -4753,6 +4753,9 @@ expand_assignment (tree to, tree from, b
> > > {
> > > enum machine_mode address_mode;
> > > rtx offset_rtx;
> > > + rtx saved_to_rtx = to_rtx;
> > > + if (misalignp)
> > > + to_rtx = mem;
> > >
> > > if (!MEM_P (to_rtx))
> > > {
> > > @@ -4785,6 +4788,11 @@ expand_assignment (tree to, tree from, b
> > > to_rtx = offset_address (to_rtx, offset_rtx,
> > > highest_pow2_factor_for_target (to,
> > > offset));
> > > + if (misalignp)
> > > + {
> > > + mem = to_rtx;
> > > + to_rtx = saved_to_rtx;
> > > + }
> > > }
> > >
> > > /* No action is needed if the target is not a memory and the field
> > >
> > >
> >
> > this patch generates wrong code too:
> >
> > foo:
> > .LFB9:
> > .cfi_startproc
> > pushq %rbx
> > .cfi_def_cfa_offset 16
> > .cfi_offset 3, -16
> > movq %rdi, %rbx
> > subq $16, %rsp
> > .cfi_def_cfa_offset 32
> > call get_i
> > movdqu (%rbx), %xmm0
> > cltq
> > movq .LC1(%rip), %xmm1
> > psrldq $8, %xmm0
> > punpcklqdq %xmm0, %xmm1
> > movdqu %xmm1, 16(%rbx,%rax,8)
> > addq $16, %rsp
> > .cfi_def_cfa_offset 16
> > popq %rbx
> > .cfi_def_cfa_offset 8
> > ret
> > .cfi_endproc
> >
> > loads *s into xmm0, modifies low part,
> > writes back at s->b[0] and s->b[1].
>
> This bug is latent because we use the mode of the base object to query
> for movmisalign_optab (and use it). It highlights the issue I raised
> in my last comment.
>
> So, new, completely untested patch:
>
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c (revision 202240)
> +++ gcc/expr.c (working copy)
> @@ -4646,10 +4646,12 @@ expand_assignment (tree to, tree from, b
>
> /* Handle misaligned stores. */
> mode = TYPE_MODE (TREE_TYPE (to));
> - if ((TREE_CODE (to) == MEM_REF
> - || TREE_CODE (to) == TARGET_MEM_REF)
> - && mode != BLKmode
> - && !mem_ref_refers_to_non_mem_p (to)
> + if (mode != BLKmode
> + && (DECL_P (to)
> + || handled_component_p (to)
> + || ((TREE_CODE (to) == MEM_REF
> + || TREE_CODE (to) == TARGET_MEM_REF)
> + && !mem_ref_refers_to_non_mem_p (to)))
> && ((align = get_object_alignment (to))
> < GET_MODE_ALIGNMENT (mode))
> && (((icode = optab_handler (movmisalign_optab, mode))
> @@ -4696,7 +4698,6 @@ expand_assignment (tree to, tree from, b
> int unsignedp;
> int volatilep = 0;
> tree tem;
> - bool misalignp;
> rtx mem = NULL_RTX;
>
> push_temp_slots ();
> @@ -4707,40 +4708,7 @@ expand_assignment (tree to, tree from, b
> && DECL_BIT_FIELD_TYPE (TREE_OPERAND (to, 1)))
> get_bit_range (&bitregion_start, &bitregion_end, to, &bitpos,
> &offset);
>
> - /* If we are going to use store_bit_field and extract_bit_field,
> - make sure to_rtx will be safe for multiple use. */
> - mode = TYPE_MODE (TREE_TYPE (tem));
> - if (TREE_CODE (tem) == MEM_REF
> - && mode != BLKmode
> - && ((align = get_object_alignment (tem))
> - < GET_MODE_ALIGNMENT (mode))
> - && ((icode = optab_handler (movmisalign_optab, mode))
> - != CODE_FOR_nothing))
> - {
> - struct expand_operand ops[2];
> -
> - misalignp = true;
> - to_rtx = gen_reg_rtx (mode);
> - mem = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
> -
> - /* If the misaligned store doesn't overwrite all bits, perform
> - rmw cycle on MEM. */
> - if (bitsize != GET_MODE_BITSIZE (mode))
> - {
> - create_input_operand (&ops[0], to_rtx, mode);
> - create_fixed_operand (&ops[1], mem);
> - /* The movmisalign<mode> pattern cannot fail, else the
> assignment
> - would silently be omitted. */
> - expand_insn (icode, 2, ops);
> -
> - mem = copy_rtx (mem);
> - }
> - }
> - else
> - {
> - misalignp = false;
> - to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
> - }
> + to_rtx = expand_expr (tem, NULL_RTX, VOIDmode, EXPAND_WRITE);
>
> /* If the bitfield is volatile, we want to access it in the
> field's mode, not the computed mode.
> @@ -4879,17 +4847,6 @@ expand_assignment (tree to, tree from, b
> get_alias_set (to), nontemporal);
> }
>
> - if (misalignp)
> - {
> - struct expand_operand ops[2];
> -
> - create_fixed_operand (&ops[0], mem);
> - create_input_operand (&ops[1], to_rtx, mode);
> - /* The movmisalign<mode> pattern cannot fail, else the assignment
> - would silently be omitted. */
> - expand_insn (icode, 2, ops);
> - }
> -
> if (result)
> preserve_temp_slots (result);
> pop_temp_slots ();
Richard, this will break the bitregion_start/end handling.
IMHO the misalign path is just a very old performance improvement.
It is totally under the control of the target hooks,
if that is executed.
It is only meant for structures that fit into registers,
like:
struct
{
doulbe: x;
};
=> mode = double
or
union
{
double x;
float y[2];
};
=> mode = int64
when it is dangerous to go there we do not have to.
I am beginning to think that my patch is right.