This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Infrastructure for passing memcpy/memset profiles to backend


Hi,
this patch adds infrastructure to pass information about expected size and
alignment down to machine description.  This is done by extending movmem/setmem
by two additional arguments and on other end of interface by adding "_hints"
variants of clear_storage and emit_block_move.

This is to be used by profile feedback as disucssed in separate mail (I am
going to try to implement the code preserving interesting bits from histograms
up to expansion time to see if it works out better than my current memcpy_hints
builtin implementation, but this bit of infrastructure is definitly shared).

I will also send x86 implementation that use this.

Bootstrapped/regtested i686-linux, OK?
:ADDPATCH middle-end:

Honza

	* expr.c (emit_block_move_via_movmem, emit_block_move_via_libcall): Add
	variant handling histograms; add wrapper.
	(clear_storage_via_libcall): Export.
	(emit_block_move_hints): Break out from ...; add histograms.
	(emit_block_move): ... this one.
	(clear_storage_hints): Break out from ...; add histograms.
	(clear_storage): ... this one.
	(set_storage_via_memset): Handle histogram.
	* expr.h (emit_block_move_via_libcall, emit_block_move_hints): Declare.
	(clear_storage_hints, clear_storage_via_libcall): Declare.
	(set_storage_via_setmem): Update prototype.
	* doc/md.texi (movmem, setmem): Document new arguments.
Index: builtins.c
===================================================================
*** builtins.c	(revision 118067)
--- builtins.c	(working copy)
*************** expand_builtin_memset (tree arglist, rtx
*** 3506,3512 ****
  			       builtin_memset_gen_str, val_rtx, dest_align, 0);
  	    }
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
! 					    dest_align))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
--- 3506,3512 ----
  			       builtin_memset_gen_str, val_rtx, dest_align, 0);
  	    }
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
! 					    dest_align, 0, -1))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
*************** expand_builtin_memset (tree arglist, rtx
*** 3526,3532 ****
  	    store_by_pieces (dest_mem, tree_low_cst (len, 1),
  			     builtin_memset_read_str, &c, dest_align, 0);
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
! 					    dest_align))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
--- 3526,3532 ----
  	    store_by_pieces (dest_mem, tree_low_cst (len, 1),
  			     builtin_memset_read_str, &c, dest_align, 0);
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
! 					    dest_align, 0, -1))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
Index: expr.c
===================================================================
*** expr.c	(revision 118067)
--- expr.c	(working copy)
*************** static unsigned HOST_WIDE_INT move_by_pi
*** 126,133 ****
  static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
  			      struct move_by_pieces *);
  static bool block_move_libcall_safe_for_call_parm (void);
! static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned);
! static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
  static tree emit_block_move_libcall_fn (int);
  static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
  static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
--- 126,132 ----
  static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
  			      struct move_by_pieces *);
  static bool block_move_libcall_safe_for_call_parm (void);
! static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
  static tree emit_block_move_libcall_fn (int);
  static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
  static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
*************** static void clear_by_pieces (rtx, unsign
*** 135,141 ****
  static void store_by_pieces_1 (struct store_by_pieces *, unsigned int);
  static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
  			       struct store_by_pieces *);
- static rtx clear_storage_via_libcall (rtx, rtx, bool);
  static tree clear_storage_libcall_fn (int);
  static rtx compress_float_constant (rtx, rtx);
  static rtx get_subtarget (rtx);
--- 134,139 ----
*************** move_by_pieces_1 (rtx (*genfun) (rtx, ..
*** 1149,1155 ****
     0 otherwise.  */
  
  rtx
! emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
  {
    bool may_use_call;
    rtx retval = 0;
--- 1147,1154 ----
     0 otherwise.  */
  
  rtx
! emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
! 		       unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    bool may_use_call;
    rtx retval = 0;
*************** emit_block_move (rtx x, rtx y, rtx size,
*** 1204,1210 ****
  
    if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align))
      move_by_pieces (x, y, INTVAL (size), align, 0);
!   else if (emit_block_move_via_movmem (x, y, size, align))
      ;
    else if (may_use_call)
      retval = emit_block_move_via_libcall (x, y, size,
--- 1203,1210 ----
  
    if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align))
      move_by_pieces (x, y, INTVAL (size), align, 0);
!   else if (emit_block_move_via_movmem (x, y, size, align,
! 				       expected_align, expected_size))
      ;
    else if (may_use_call)
      retval = emit_block_move_via_libcall (x, y, size,
*************** emit_block_move (rtx x, rtx y, rtx size,
*** 1218,1223 ****
--- 1218,1229 ----
    return retval;
  }
  
+ rtx
+ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
+ {
+   return emit_block_move_hints (x, y, size, method, 0, -1);
+ }
+ 
  /* A subroutine of emit_block_move.  Returns true if calling the
     block move libcall will not clobber any parameters which may have
     already been placed on the stack.  */
*************** block_move_libcall_safe_for_call_parm (v
*** 1268,1279 ****
     return true if successful.  */
  
  static bool
! emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align)
  {
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    int save_volatile_ok = volatile_ok;
    enum machine_mode mode;
  
    /* Since this is a move insn, we don't care about volatility.  */
    volatile_ok = 1;
  
--- 1274,1289 ----
     return true if successful.  */
  
  static bool
! emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
! 			    unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    int save_volatile_ok = volatile_ok;
    enum machine_mode mode;
  
+   if (expected_align < align)
+     expected_align = align;
+ 
    /* Since this is a move insn, we don't care about volatility.  */
    volatile_ok = 1;
  
*************** emit_block_move_via_movmem (rtx x, rtx y
*** 1317,1323 ****
  	     that it doesn't fail the expansion because it thinks
  	     emitting the libcall would be more efficient.  */
  
! 	  pat = GEN_FCN ((int) code) (x, y, op2, opalign);
  	  if (pat)
  	    {
  	      emit_insn (pat);
--- 1327,1338 ----
  	     that it doesn't fail the expansion because it thinks
  	     emitting the libcall would be more efficient.  */
  
! 	  if (insn_data[(int) code].n_operands == 4)
! 	    pat = GEN_FCN ((int) code) (x, y, op2, opalign);
! 	  else
! 	    pat = GEN_FCN ((int) code) (x, y, op2, opalign,
! 					GEN_INT (expected_align),
! 					GEN_INT (expected_size));
  	  if (pat)
  	    {
  	      emit_insn (pat);
*************** emit_block_move_via_movmem (rtx x, rtx y
*** 1336,1342 ****
  /* A subroutine of emit_block_move.  Expand a call to memcpy.
     Return the return value from memcpy, 0 otherwise.  */
  
! static rtx
  emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
  {
    rtx dst_addr, src_addr;
--- 1351,1357 ----
  /* A subroutine of emit_block_move.  Expand a call to memcpy.
     Return the return value from memcpy, 0 otherwise.  */
  
! rtx
  emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
  {
    rtx dst_addr, src_addr;
*************** store_by_pieces_2 (rtx (*genfun) (rtx, .
*** 2497,2503 ****
     its length in bytes.  */
  
  rtx
! clear_storage (rtx object, rtx size, enum block_op_methods method)
  {
    enum machine_mode mode = GET_MODE (object);
    unsigned int align;
--- 2512,2519 ----
     its length in bytes.  */
  
  rtx
! clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
! 		     unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    enum machine_mode mode = GET_MODE (object);
    unsigned int align;
*************** clear_storage (rtx object, rtx size, enu
*** 2537,2543 ****
    if (GET_CODE (size) == CONST_INT
        && CLEAR_BY_PIECES_P (INTVAL (size), align))
      clear_by_pieces (object, INTVAL (size), align);
!   else if (set_storage_via_setmem (object, size, const0_rtx, align))
      ;
    else
      return clear_storage_via_libcall (object, size,
--- 2553,2560 ----
    if (GET_CODE (size) == CONST_INT
        && CLEAR_BY_PIECES_P (INTVAL (size), align))
      clear_by_pieces (object, INTVAL (size), align);
!   else if (set_storage_via_setmem (object, size, const0_rtx, align,
! 				   expected_align, expected_size))
      ;
    else
      return clear_storage_via_libcall (object, size,
*************** clear_storage (rtx object, rtx size, enu
*** 2546,2555 ****
    return NULL;
  }
  
  /* A subroutine of clear_storage.  Expand a call to memset.
     Return the return value of memset, 0 otherwise.  */
  
! static rtx
  clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
  {
    tree call_expr, arg_list, fn, object_tree, size_tree;
--- 2563,2578 ----
    return NULL;
  }
  
+ rtx
+ clear_storage (rtx object, rtx size, enum block_op_methods method)
+ {
+   return clear_storage_hints (object, size, method, 0, -1);
+ }
+ 
  /* A subroutine of clear_storage.  Expand a call to memset.
     Return the return value of memset, 0 otherwise.  */
  
! rtx
  clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
  {
    tree call_expr, arg_list, fn, object_tree, size_tree;
*************** clear_storage_libcall_fn (int for_call)
*** 2644,2650 ****
  /* Expand a setmem pattern; return true if successful.  */
  
  bool
! set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align)
  {
    /* Try the most limited insn first, because there's no point
       including more than one in the machine description unless
--- 2667,2674 ----
  /* Expand a setmem pattern; return true if successful.  */
  
  bool
! set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
! 			unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    /* Try the most limited insn first, because there's no point
       including more than one in the machine description unless
*************** set_storage_via_setmem (rtx object, rtx 
*** 2653,2658 ****
--- 2677,2685 ----
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    enum machine_mode mode;
  
+   if (expected_align < align)
+     expected_align = align;
+ 
    for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
         mode = GET_MODE_WIDER_MODE (mode))
      {
*************** set_storage_via_setmem (rtx object, rtx 
*** 2693,2699 ****
  		opchar = copy_to_mode_reg (char_mode, opchar);
  	    }
  
! 	  pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign);
  	  if (pat)
  	    {
  	      emit_insn (pat);
--- 2720,2731 ----
  		opchar = copy_to_mode_reg (char_mode, opchar);
  	    }
  
! 	  if (insn_data[(int) code].n_operands == 4)
! 	    pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign);
! 	  else
! 	    pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign,
! 					GEN_INT (expected_align),
! 					GEN_INT (expected_size));
  	  if (pat)
  	    {
  	      emit_insn (pat);
Index: expr.h
===================================================================
*** expr.h	(revision 118067)
--- expr.h	(working copy)
*************** extern void init_block_move_fn (const ch
*** 377,382 ****
--- 377,385 ----
  extern void init_block_clear_fn (const char *);
  
  extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
+ extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
+ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
+ 			          unsigned int, HOST_WIDE_INT);
  
  /* Copy all or part of a value X into registers starting at REGNO.
     The number of registers to be filled is NREGS.  */
*************** extern void use_group_regs (rtx *, rtx);
*** 423,431 ****
  /* Write zeros through the storage of OBJECT.
     If OBJECT has BLKmode, SIZE is its length in bytes.  */
  extern rtx clear_storage (rtx, rtx, enum block_op_methods);
  
  /* Expand a setmem pattern; return true if successful.  */
! extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int);
  
  /* Determine whether the LEN bytes can be moved by using several move
     instructions.  Return nonzero if a call to move_by_pieces should
--- 426,440 ----
  /* Write zeros through the storage of OBJECT.
     If OBJECT has BLKmode, SIZE is its length in bytes.  */
  extern rtx clear_storage (rtx, rtx, enum block_op_methods);
+ extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods,
+ 			        unsigned int, HOST_WIDE_INT);
+ 
+ /* The same, but always output an library call.  */
+ rtx clear_storage_via_libcall (rtx, rtx, bool);
  
  /* Expand a setmem pattern; return true if successful.  */
! extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
! 				    unsigned int, HOST_WIDE_INT);
  
  /* Determine whether the LEN bytes can be moved by using several move
     instructions.  Return nonzero if a call to move_by_pieces should
Index: doc/md.texi
===================================================================
*** doc/md.texi	(revision 118067)
--- doc/md.texi	(working copy)
*************** destination, in the form of a @code{cons
*** 3806,3811 ****
--- 3806,3816 ----
  compiler knows that both source and destination are word-aligned,
  it may provide the value 4 for this operand.
  
+ Optional operands 5 and 6 specify expected alignment and size of block
+ respectively.  The expected alignment differs from alignment in operand 4
+ in a way that the blocks are not required to be aligned according to it in
+ all cases. Expected size, when unknown, is set to @code{(const_int -1)}.
+ 
  Descriptions of multiple @code{movmem@var{m}} patterns can only be
  beneficial if the patterns for smaller modes have fewer restrictions
  on their first, second and fourth operands.  Note that the mode @var{m}
*************** of a @code{const_int} rtx.  Thus, if the
*** 3838,3843 ****
--- 3843,3853 ----
  destination is word-aligned, it may provide the value 4 for this
  operand.
  
+ Optional operands 5 and 6 specify expected alignment and size of block
+ respectively.  The expected alignment differs from alignment in operand 4
+ in a way that the blocks are not required to be aligned according to it in
+ all cases. Expected size, when unknown, is set to @code{(const_int -1)}.
+ 
  The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}.
  
  @cindex @code{cmpstrn@var{m}} instruction pattern


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]