This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix small structure passing on x86-64
- From: Jan Hubicka <hubicka at ucw dot cz>
- To: Eric Botcazou <ebotcazou at adacore dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 12 Nov 2008 17:35:03 +0100
- Subject: Re: [PATCH] Fix small structure passing on x86-64
- References: <200810311146.58286.ebotcazou@adacore.com>
Hi,
here is promised patch. It shoud fix several problems on passing
structures:
In
struct S { char c; char arr[3]; } s;
main()
{
t(s);
}
we should emit 32bit read, not 64bit that might cause segfault if s ends up on the end of page.
In
struct S { char c; char arr[3]; double f } s;
main()
{
t(s);
}
we should emit 32bit read followed by 64bit SSE read. This is only performance not correcntess issue,
but it is related. This is fixed by the i386.c change.
In
#include <assert.h>
struct S
{
char arr[3];
} s =
{
{
1, 2, 3}};
__attribute__ ((noinline))
void t (struct S s)
{
assert (s.arr[0] == 1 && s.arr[1] == 2 && s.arr[2] == 3);
}
main ()
{
t (s);
return 0;
}
Should read exactly 3 bytes and pack it into register since s might appear just at the end of page.
struct S
{
char c;
char arr[10];
} s;
main ()
{
t (s);
}
Should be similar to above with one extra 64bit read in the front.
These seems to be all wrong in all GCC versions on x86-64. I've updated
move_block_to_reg to tage size in bytes, not in the number of words. It
uses partial move at the end of block if there is appropriate mode or
bitfield extraction otherwise.
I hope I did not introduced any endianity issues, but I am not sure if there is
big endian machine that cares.
Seems sane?
Honza
* expr.c (move_block_to_reg): Take block size in bytes; use
partial move on the end of block to not read past end of buffer.
(emit_push_insn): Update call of move_block_to_reg.
* calls.c (load_register_parameters): Update call of move_block_to_reg.
* i386.c (classify_argument): Fix code promoting partial to full classes.
Index: expr.c
===================================================================
*** expr.c (revision 141617)
--- expr.c (working copy)
*************** emit_block_move_via_loop (rtx x, rtx y,
*** 1523,1543 ****
}
/* Copy all or part of a value X into registers starting at REGNO.
! The number of registers to be filled is NREGS. */
void
! move_block_to_reg (int regno, rtx x, int nregs, enum machine_mode mode)
{
! int i;
#ifdef HAVE_load_multiple
rtx pat;
rtx last;
#endif
! if (nregs == 0)
return;
! if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
x = validize_mem (force_const_mem (mode, x));
/* See if the machine can do this with a load multiple insn. */
--- 1523,1544 ----
}
/* Copy all or part of a value X into registers starting at REGNO.
! The number of registers to be filled is BYTES/UNITS_PER_WORD. */
void
! move_block_to_reg (int regno, rtx x, int bytes, enum machine_mode mode)
{
! int offset;
! enum machine_mode mode2 = word_mode;
#ifdef HAVE_load_multiple
rtx pat;
rtx last;
#endif
! if (bytes == 0)
return;
! if (CONSTANT_P (x) && !LEGITIMATE_CONSTANT_P (x))
x = validize_mem (force_const_mem (mode, x));
/* See if the machine can do this with a load multiple insn. */
*************** move_block_to_reg (int regno, rtx x, int
*** 1557,1565 ****
}
#endif
! for (i = 0; i < nregs; i++)
! emit_move_insn (gen_rtx_REG (word_mode, regno + i),
! operand_subword_force (x, i, mode));
}
/* Copy all or part of a BLKmode value X out of registers starting at REGNO.
--- 1558,1616 ----
}
#endif
! for (offset = 0; offset < bytes; offset += UNITS_PER_WORD)
! {
! rtx reg;
! reg = gen_rtx_REG (word_mode, regno + offset / UNITS_PER_WORD);
!
! /* Do not read past end of memory block. */
! if (bytes - offset < UNITS_PER_WORD
! && MEM_P (x))
! {
! mode2 =
! smallest_mode_for_size ((bytes - offset) * UNITS_PER_WORD,
! MODE_INT);
! if (GET_MODE_UNIT_SIZE (mode2) != bytes - offset)
! {
! rtx dst;
! dst =
! extract_bit_field (x, (bytes - offset) * BITS_PER_UNIT,
! offset * BITS_PER_UNIT, 1,
! gen_rtx_REG (word_mode,
! regno +
! offset / UNITS_PER_WORD),
! word_mode, word_mode);
! if (BYTES_BIG_ENDIAN)
! dst = expand_shift (RSHIFT_EXPR, word_mode, dst,
! build_int_cst (NULL_TREE,
! (UNITS_PER_WORD - bytes +
! offset) * BITS_PER_UNIT),
! dst, 0);
!
! if (reg != dst)
! emit_move_insn (reg, dst);
! break;
! }
! }
! emit_move_insn (gen_lowpart (mode2, reg),
! simplify_gen_subreg (mode2,
! operand_subword_force (x,
! offset /
! UNITS_PER_WORD,
! mode),
! word_mode, 0));
! if (mode2 != word_mode && BYTES_BIG_ENDIAN)
! {
! rtx dst;
! dst = expand_shift (RSHIFT_EXPR, word_mode, reg,
! build_int_cst (NULL_TREE,
! (UNITS_PER_WORD - bytes +
! offset) * BITS_PER_UNIT), reg,
! 0);
! if (reg != dst)
! emit_move_insn (reg, dst);
! }
! }
}
/* Copy all or part of a BLKmode value X out of registers starting at REGNO.
*************** emit_push_insn (rtx x, enum machine_mode
*** 3981,3987 ****
else
{
gcc_assert (partial % UNITS_PER_WORD == 0);
! move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode);
}
}
--- 4032,4038 ----
else
{
gcc_assert (partial % UNITS_PER_WORD == 0);
! move_block_to_reg (REGNO (reg), x, partial, mode);
}
}
Index: calls.c
===================================================================
*** calls.c (revision 141617)
--- calls.c (working copy)
*************** load_register_parameters (struct arg_dat
*** 1592,1597 ****
--- 1592,1598 ----
{
int partial = args[i].partial;
int nregs;
+ int bytes;
int size = 0;
rtx before_arg = get_last_insn ();
/* Set non-negative if we must move a word at a time, even if
*************** load_register_parameters (struct arg_dat
*** 1599,1615 ****
--- 1600,1619 ----
to -1 if we just use a normal move insn. This value can be
zero if the argument is a zero size structure. */
nregs = -1;
+ bytes = -1;
if (GET_CODE (reg) == PARALLEL)
;
else if (partial)
{
gcc_assert (partial % UNITS_PER_WORD == 0);
nregs = partial / UNITS_PER_WORD;
+ bytes = partial;
}
else if (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode)
{
size = int_size_in_bytes (TREE_TYPE (args[i].tree_value));
nregs = (size + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD;
+ bytes = size;
}
else
size = GET_MODE_SIZE (args[i].mode);
*************** load_register_parameters (struct arg_dat
*** 1694,1700 ****
emit_move_insn (ri, x);
}
else
! move_block_to_reg (REGNO (reg), mem, nregs, args[i].mode);
}
/* When a parameter is a block, and perhaps in other cases, it is
--- 1698,1704 ----
emit_move_insn (ri, x);
}
else
! move_block_to_reg (REGNO (reg), mem, bytes, args[i].mode);
}
/* When a parameter is a block, and perhaps in other cases, it is
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c (revision 141617)
--- config/i386/i386.c (working copy)
*************** classify_argument (enum machine_mode mod
*** 4928,4937 ****
return 0;
/* The partial classes are now full classes. */
! if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS
! && !((bit_offset % 64) == 0 && bytes == 4))
subclasses[0] = X86_64_INTEGER_CLASS;
for (i = 0; i < words; i++)
--- 4928,4938 ----
return 0;
/* The partial classes are now full classes. */
! if (subclasses[0] == X86_64_SSESF_CLASS
! && (bit_offset + 7) / 8 + bytes > 4)
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS
! && (bit_offset + 7) / 8 + bytes > 4)
subclasses[0] = X86_64_INTEGER_CLASS;
for (i = 0; i < words; i++)