This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix small structure passing on x86-64


Hi,
here is promised patch.  It shoud fix several problems on passing
structures:

In
struct S { char c; char arr[3];  } s;

main()
{
  t(s);
}

we should emit 32bit read, not 64bit that might cause segfault if s ends up on the end of page.

In
struct S { char c; char arr[3]; double f  } s;

main()
{
  t(s);
}
we should emit 32bit read followed by 64bit SSE read.  This is only performance not correcntess issue,
but it is related.  This is fixed by the i386.c change.

In
#include <assert.h>
struct S
{
  char arr[3];
} s =
{
  {
1, 2, 3}};

__attribute__ ((noinline))
     void t (struct S s)
{
  assert (s.arr[0] == 1 && s.arr[1] == 2 && s.arr[2] == 3);
}

main ()
{
  t (s);
  return 0;
}

Should read exactly 3 bytes and pack it into register since s might appear just at the end of page.

struct S
{
  char c;
  char arr[10];
} s;

main ()
{
  t (s);
}

Should be similar to above with one extra 64bit read in the front.

These seems to be all wrong in all GCC versions on x86-64.  I've updated
move_block_to_reg to tage size in bytes, not in the number of words.  It
uses partial move at the end of block if there is appropriate mode or
bitfield extraction otherwise.

I hope I did not introduced any endianity issues, but I am not sure if there is 
big endian machine that cares.

Seems sane?

Honza

	* expr.c (move_block_to_reg): Take block size in bytes; use
	partial move on the end of block to not read past end of buffer.
	(emit_push_insn): Update call of move_block_to_reg.
	* calls.c (load_register_parameters): Update call of move_block_to_reg.
	* i386.c (classify_argument): Fix code promoting partial to full classes.
Index: expr.c
===================================================================
*** expr.c	(revision 141617)
--- expr.c	(working copy)
*************** emit_block_move_via_loop (rtx x, rtx y, 
*** 1523,1543 ****
  }
  
  /* Copy all or part of a value X into registers starting at REGNO.
!    The number of registers to be filled is NREGS.  */
  
  void
! move_block_to_reg (int regno, rtx x, int nregs, enum machine_mode mode)
  {
!   int i;
  #ifdef HAVE_load_multiple
    rtx pat;
    rtx last;
  #endif
  
!   if (nregs == 0)
      return;
  
!   if (CONSTANT_P (x) && ! LEGITIMATE_CONSTANT_P (x))
      x = validize_mem (force_const_mem (mode, x));
  
    /* See if the machine can do this with a load multiple insn.  */
--- 1523,1544 ----
  }
  
  /* Copy all or part of a value X into registers starting at REGNO.
!    The number of registers to be filled is BYTES/UNITS_PER_WORD.  */
  
  void
! move_block_to_reg (int regno, rtx x, int bytes, enum machine_mode mode)
  {
!   int offset;
!   enum machine_mode mode2 = word_mode;
  #ifdef HAVE_load_multiple
    rtx pat;
    rtx last;
  #endif
  
!   if (bytes == 0)
      return;
  
!   if (CONSTANT_P (x) && !LEGITIMATE_CONSTANT_P (x))
      x = validize_mem (force_const_mem (mode, x));
  
    /* See if the machine can do this with a load multiple insn.  */
*************** move_block_to_reg (int regno, rtx x, int
*** 1557,1565 ****
      }
  #endif
  
!   for (i = 0; i < nregs; i++)
!     emit_move_insn (gen_rtx_REG (word_mode, regno + i),
! 		    operand_subword_force (x, i, mode));
  }
  
  /* Copy all or part of a BLKmode value X out of registers starting at REGNO.
--- 1558,1616 ----
      }
  #endif
  
!   for (offset = 0; offset < bytes; offset += UNITS_PER_WORD)
!     {
!       rtx reg;
!       reg = gen_rtx_REG (word_mode, regno + offset / UNITS_PER_WORD);
! 
!       /* Do not read past end of memory block.  */
!       if (bytes - offset < UNITS_PER_WORD
!           && MEM_P (x))
! 	{
! 	  mode2 =
! 	    smallest_mode_for_size ((bytes - offset) * UNITS_PER_WORD,
! 				    MODE_INT);
! 	  if (GET_MODE_UNIT_SIZE (mode2) != bytes - offset)
! 	    {
! 	      rtx dst;
! 	      dst =
! 		extract_bit_field (x, (bytes - offset) * BITS_PER_UNIT,
! 				   offset * BITS_PER_UNIT, 1,
! 				   gen_rtx_REG (word_mode,
! 						regno +
! 						offset / UNITS_PER_WORD),
! 				   word_mode, word_mode);
! 	      if (BYTES_BIG_ENDIAN)
! 		dst = expand_shift (RSHIFT_EXPR, word_mode, dst,
! 				    build_int_cst (NULL_TREE,
! 						   (UNITS_PER_WORD - bytes +
! 						    offset) * BITS_PER_UNIT),
! 				    dst, 0);
! 
! 	      if (reg != dst)
! 		emit_move_insn (reg, dst);
! 	      break;
! 	    }
! 	}
!       emit_move_insn (gen_lowpart (mode2, reg),
! 		      simplify_gen_subreg (mode2,
! 					   operand_subword_force (x,
! 								  offset /
! 								  UNITS_PER_WORD,
! 								  mode),
! 					   word_mode, 0));
!       if (mode2 != word_mode && BYTES_BIG_ENDIAN)
! 	{
! 	  rtx dst;
! 	  dst = expand_shift (RSHIFT_EXPR, word_mode, reg,
! 			      build_int_cst (NULL_TREE,
! 					     (UNITS_PER_WORD - bytes +
! 					      offset) * BITS_PER_UNIT), reg,
! 			      0);
! 	  if (reg != dst)
! 	    emit_move_insn (reg, dst);
! 	}
!     }
  }
  
  /* Copy all or part of a BLKmode value X out of registers starting at REGNO.
*************** emit_push_insn (rtx x, enum machine_mode
*** 3981,3987 ****
        else
  	{
  	  gcc_assert (partial % UNITS_PER_WORD == 0);
! 	  move_block_to_reg (REGNO (reg), x, partial / UNITS_PER_WORD, mode);
  	}
      }
  
--- 4032,4038 ----
        else
  	{
  	  gcc_assert (partial % UNITS_PER_WORD == 0);
! 	  move_block_to_reg (REGNO (reg), x, partial, mode);
  	}
      }
  
Index: calls.c
===================================================================
*** calls.c	(revision 141617)
--- calls.c	(working copy)
*************** load_register_parameters (struct arg_dat
*** 1592,1597 ****
--- 1592,1598 ----
  	{
  	  int partial = args[i].partial;
  	  int nregs;
+ 	  int bytes;
  	  int size = 0;
  	  rtx before_arg = get_last_insn ();
  	  /* Set non-negative if we must move a word at a time, even if
*************** load_register_parameters (struct arg_dat
*** 1599,1615 ****
--- 1600,1619 ----
  	     to -1 if we just use a normal move insn.  This value can be
  	     zero if the argument is a zero size structure.  */
  	  nregs = -1;
+ 	  bytes = -1;
  	  if (GET_CODE (reg) == PARALLEL)
  	    ;
  	  else if (partial)
  	    {
  	      gcc_assert (partial % UNITS_PER_WORD == 0);
  	      nregs = partial / UNITS_PER_WORD;
+ 	      bytes = partial;
  	    }
  	  else if (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode)
  	    {
  	      size = int_size_in_bytes (TREE_TYPE (args[i].tree_value));
  	      nregs = (size + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD;
+ 	      bytes = size;
  	    }
  	  else
  	    size = GET_MODE_SIZE (args[i].mode);
*************** load_register_parameters (struct arg_dat
*** 1694,1700 ****
  		    emit_move_insn (ri, x);
  		}
  	      else
! 		move_block_to_reg (REGNO (reg), mem, nregs, args[i].mode);
  	    }
  
  	  /* When a parameter is a block, and perhaps in other cases, it is
--- 1698,1704 ----
  		    emit_move_insn (ri, x);
  		}
  	      else
! 		move_block_to_reg (REGNO (reg), mem, bytes, args[i].mode);
  	    }
  
  	  /* When a parameter is a block, and perhaps in other cases, it is
Index: config/i386/i386.c
===================================================================
*** config/i386/i386.c	(revision 141617)
--- config/i386/i386.c	(working copy)
*************** classify_argument (enum machine_mode mod
*** 4928,4937 ****
  	      return 0;
  
  	    /* The partial classes are now full classes.  */
! 	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
  	      subclasses[0] = X86_64_SSE_CLASS;
  	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
! 		&& !((bit_offset % 64) == 0 && bytes == 4))
  	      subclasses[0] = X86_64_INTEGER_CLASS;
  
  	    for (i = 0; i < words; i++)
--- 4928,4938 ----
  	      return 0;
  
  	    /* The partial classes are now full classes.  */
! 	    if (subclasses[0] == X86_64_SSESF_CLASS
! 	        && (bit_offset + 7) / 8 + bytes > 4)
  	      subclasses[0] = X86_64_SSE_CLASS;
  	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
! 	        && (bit_offset + 7) / 8 + bytes > 4)
  	      subclasses[0] = X86_64_INTEGER_CLASS;
  
  	    for (i = 0; i < words; i++)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]