This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: *_BY_PIECES_P flaw
>>>>> James E Wilson writes:
James> I think all of the uses of MOVE_MAX in expr.c are pointless, since we
James> will never consider anything bigger than MOVE_MAX_PIECES.
An unusual use of MOVE_MAX in expr.c is setting the strictness of
the alignment used for the moves. As you mentioned, move_by_pieces (and
clear_by_pieces) investigate the mode for the maximum size specified by
MOVE_MAX_PIECES. move_by_pieces_ninsns uses MOVE_MAX for the size. The
alignment of the move then potentially is overridden based on the natural
alignment of MOVE_MAX, i.e., MOVE_MAX * BITS_PER_UNIT.
The decision to override appears to be another flawed area of
expr.c.
if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
|| align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
align = MOVE_MAX * BITS_PER_UNIT;
This tests SLOW_UNALIGNED_ACCESS using word_mode, which may not correspond
to the mode equivalent to MOVE_MAX or the mode chosen for the move. It
also assumes that the only two choices are MOVE_MAX natural alignment or
the current alignment instead of probing other modes for fast unaligned
access. This turns out to hurt 64-bit PowerPC a lot.
PowerPC doubleword loads and stores prefer at least word
alignment, but word loads and stores are more flexible. In 32-bit PowerPC
mode, the above code increases the alignment; but in 64-bit mode, it
remains unchanged. For example, a char array is cleared by words in
32-bit mode but cleared by bytes in 64-bit mode.
Overriding the alignment and then using GET_MODE_ALIGNMENT for the
move instruction seems like a dubious way to pick the decomposition size
for each chunk. I would have expected that the algorithms would choose
the largest usable mode based on the size of remaining data and the
alignment of the data in comparison to SLOW_UNALIGNED_ACCESS for the mode.
Other ports' definitions of SLOW_UNALIGNED_ACCESS do not depend on mode,
so the granularity mainly affects PowerPC.
The following patch modifies the existing alignment override
algorithm to choose the best alignment based on the largest effective mode
for the alignment. The patch uses MOVE_MAX to designate the size of the
move, but not the natural alignment. The alignment is set from the mode
chosen earlier in the function or word_mode. It's one way of addressing
some of the problems without completely rewriting the functions.
David
Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.696
diff -c -p -r1.696 expr.c
*** expr.c 6 Aug 2004 10:40:29 -0000 1.696
--- expr.c 9 Aug 2004 01:41:04 -0000
*************** move_by_pieces (rtx to, rtx from, unsign
*** 921,929 ****
data.to_addr = copy_addr_to_reg (to_addr);
}
! if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
! || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
! align = MOVE_MAX * BITS_PER_UNIT;
/* First move what we can in the largest integer mode, then go to
successively smaller modes. */
--- 921,941 ----
data.to_addr = copy_addr_to_reg (to_addr);
}
! if (align > GET_MODE_ALIGNMENT (mode) || align >= BIGGEST_ALIGNMENT)
! align = GET_MODE_ALIGNMENT (mode);
! else
! {
! enum machine_mode xmode;
!
! for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = tmode;
! tmode != VOIDmode;
! xmode = tmode, tmode = GET_MODE_WIDER_MODE (tmode))
! if (GET_MODE_SIZE (tmode) > MOVE_MAX
! || SLOW_UNALIGNED_ACCESS (tmode, align))
! break;
!
! align = MAX (align, GET_MODE_ALIGNMENT (xmode));
! }
/* First move what we can in the largest integer mode, then go to
successively smaller modes. */
*************** move_by_pieces_ninsns (unsigned HOST_WID
*** 989,997 ****
unsigned HOST_WIDE_INT n_insns = 0;
unsigned HOST_WIDE_INT max_size = MOVE_MAX + 1;
! if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
! || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
! align = MOVE_MAX * BITS_PER_UNIT;
while (max_size > 1)
{
--- 1001,1021 ----
unsigned HOST_WIDE_INT n_insns = 0;
unsigned HOST_WIDE_INT max_size = MOVE_MAX + 1;
! if (align > GET_MODE_ALIGNMENT (word_mode) || align >= BIGGEST_ALIGNMENT)
! align = GET_MODE_ALIGNMENT (word_mode);
! else
! {
! enum machine_mode tmode, xmode;
!
! for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = tmode;
! tmode != VOIDmode;
! xmode = tmode, tmode = GET_MODE_WIDER_MODE (tmode))
! if (GET_MODE_SIZE (tmode) > MOVE_MAX
! || SLOW_UNALIGNED_ACCESS (tmode, align))
! break;
!
! align = MAX (align, GET_MODE_ALIGNMENT (xmode));
! }
while (max_size > 1)
{
*************** can_store_by_pieces (unsigned HOST_WIDE_
*** 1994,2002 ****
if (! STORE_BY_PIECES_P (len, align))
return 0;
! if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
! || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
! align = MOVE_MAX * BITS_PER_UNIT;
/* We would first store what we can in the largest integer mode, then go to
successively smaller modes. */
--- 2018,2038 ----
if (! STORE_BY_PIECES_P (len, align))
return 0;
! if (align > GET_MODE_ALIGNMENT (word_mode) || align >= BIGGEST_ALIGNMENT)
! align = GET_MODE_ALIGNMENT (word_mode);
! else
! {
! enum machine_mode xmode;
!
! for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = tmode;
! tmode != VOIDmode;
! xmode = tmode, tmode = GET_MODE_WIDER_MODE (tmode))
! if (GET_MODE_SIZE (tmode) > MOVE_MAX
! || SLOW_UNALIGNED_ACCESS (tmode, align))
! break;
!
! align = MAX (align, GET_MODE_ALIGNMENT (xmode));
! }
/* We would first store what we can in the largest integer mode, then go to
successively smaller modes. */
*************** store_by_pieces_1 (struct store_by_piece
*** 2196,2204 ****
data->to_addr = copy_addr_to_reg (to_addr);
}
! if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
! || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
! align = MOVE_MAX * BITS_PER_UNIT;
/* First store what we can in the largest integer mode, then go to
successively smaller modes. */
--- 2232,2252 ----
data->to_addr = copy_addr_to_reg (to_addr);
}
! if (align > GET_MODE_ALIGNMENT (mode) || align >= BIGGEST_ALIGNMENT)
! align = GET_MODE_ALIGNMENT (mode);
! else
! {
! enum machine_mode xmode;
!
! for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT), xmode = tmode;
! tmode != VOIDmode;
! xmode = tmode, tmode = GET_MODE_WIDER_MODE (tmode))
! if (GET_MODE_SIZE (tmode) > MOVE_MAX
! || SLOW_UNALIGNED_ACCESS (tmode, align))
! break;
!
! align = MAX (align, GET_MODE_ALIGNMENT (xmode));
! }
/* First store what we can in the largest integer mode, then go to
successively smaller modes. */