This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] Memcpy/memset profiling infrastructure


Hi,
I am sending this patch as RFC because it would need at least adding mechanizm to hide internal builtin functions from user.

What I am shooting for is to annotate histogram information during profiling
about expected size and alignment of memcpy/memsetted blocks to be used later
at RTL expansion time.  I do have patch that allows chosing of proper memcpy
algorithm (i. e. rep/movs, loop, unrolled loop or libcall) in x86 backend based
on this info.

The problem lies in annotating the histogram with the call.  What I do is
simply adding variants builtin_memcpy_hints/builtin_memset_hints that do accept
those extra information as additional arguments.  This is very non-intrusive to
rest of middle-end but do have disadvantage that it works only for explicit
memset/memcpy calls (ie not for structure assignments, where alignment would be
still interesting, but not as much as in the generic case) and it woiuld be
moderately painful to add similar profiling to other builtins (my profiling
code, not included in patch, only memset/memcpy/bzero is profiled) because new
alternatives needs to be introduced.

I've disucssed this briefly on GCC summit with Rth and we didn't found better
way around.  Possibly if we get rid of TER, it would be more convenient to
attach the profiles to statements and use it at exansion time, but even that
has problems, since updating the histograms would need some care.

If no one comes with good idea, I will add the bits to avoid the function from
being user visible (how this is best doable BTW?) and send updated patch at
begining of next week.

Honza

	* tree.c (build_common_builtin_nodes): Add memcpy_hints and memset_hints
	* tree.h (validate_arglist): Declare.
	* builtins.c (expand_builtin_memcpy, expand_builtin_memset): Add decl
	argument; handle hints.
	(expand_bultin): Handle hints variants.
	(fold_bultin_memory_op): Accept historgrams.
	* builtin-types.def: 
	(BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE_SIZE_SIZE,
	BT_FN_PTR_PTR_INT_SIZE_SIZE_SIZE_SIZE): Define
	* builtins.def (BULIT_IN_MEMCPY_HINTS, BULILT_IN_MEMSET_HINTS): Declare.
	* expr.c (emit_block_move_via_movmem, emit_block_move_via_libcall): Add
	variant handling histograms; add wrapper.
	(clear_storage_via_libcall): Export.
	(emit_block_move_hints): Break out from ...; add histograms.
	(emit_block_move): ... this one.
	(clear_storage_hints): Break out from ...; add histograms.
	(clear_storage): ... this one.
	(set_storage_via_memset): Handle histogram.
	* expr.c (emit_block_move_via_libcall, emit_block_move_hints): Declare.
	(clear_storage_hints, clear_storage_via_libcall): Declare.
	(set_storage_via_setmem): Update prototype.
Index: tree.c
===================================================================
*** tree.c	(revision 118043)
--- tree.c	(working copy)
*************** build_common_builtin_nodes (void)
*** 6676,6681 ****
--- 6676,6695 ----
  	local_define_builtin ("__builtin_memmove", ftype, BUILT_IN_MEMMOVE,
  			      "memmove", ECF_NOTHROW);
      }
+   if (built_in_decls[BUILT_IN_MEMCPY_HINTS] == NULL)
+     {
+       tmp = tree_cons (NULL_TREE, size_type_node, void_list_node);
+       tmp = tree_cons (NULL_TREE, const_ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       ftype = build_function_type (ptr_type_node, tmp);
+ 
+       if (built_in_decls[BUILT_IN_MEMCPY_HINTS] == NULL)
+ 	local_define_builtin ("__builtin_memcpy_hints", ftype, BUILT_IN_MEMCPY,
+ 			      "memcpy", ECF_NOTHROW);
+     }
  
    if (built_in_decls[BUILT_IN_MEMCMP] == NULL)
      {
*************** build_common_builtin_nodes (void)
*** 6696,6701 ****
--- 6710,6727 ----
        local_define_builtin ("__builtin_memset", ftype, BUILT_IN_MEMSET,
  			    "memset", ECF_NOTHROW);
      }
+   if (built_in_decls[BUILT_IN_MEMSET_HINTS] == NULL)
+     {
+       tmp = tree_cons (NULL_TREE, size_type_node, void_list_node);
+       tmp = tree_cons (NULL_TREE, integer_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       tmp = tree_cons (NULL_TREE, ptr_type_node, tmp);
+       ftype = build_function_type (ptr_type_node, tmp);
+       local_define_builtin ("__builtin_memset_hints", ftype, BUILT_IN_MEMSET,
+ 			    "memset", ECF_NOTHROW);
+     }
  
    if (built_in_decls[BUILT_IN_ALLOCA] == NULL)
      {
Index: tree.h
===================================================================
*** tree.h	(revision 118043)
--- tree.h	(working copy)
*************** extern tree strip_float_extensions (tree
*** 4296,4302 ****
  extern tree c_strlen (tree, int);
  extern tree std_gimplify_va_arg_expr (tree, tree, tree *, tree *);
  extern tree build_va_arg_indirect_ref (tree);
! tree build_string_literal (int, const char *);
  
  /* In convert.c */
  extern tree strip_float_extensions (tree);
--- 4296,4303 ----
  extern tree c_strlen (tree, int);
  extern tree std_gimplify_va_arg_expr (tree, tree, tree *, tree *);
  extern tree build_va_arg_indirect_ref (tree);
! extern tree build_string_literal (int, const char *);
! extern int validate_arglist (tree arglist, ...);
  
  /* In convert.c */
  extern tree strip_float_extensions (tree);
Index: builtins.c
===================================================================
*** builtins.c	(revision 118043)
--- builtins.c	(working copy)
*************** static rtx expand_builtin_strcat (tree, 
*** 109,115 ****
  static rtx expand_builtin_strncat (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strspn (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strcspn (tree, rtx, enum machine_mode);
! static rtx expand_builtin_memcpy (tree, rtx, enum machine_mode);
  static rtx expand_builtin_mempcpy (tree, tree, rtx, enum machine_mode, int);
  static rtx expand_builtin_memmove (tree, tree, rtx, enum machine_mode);
  static rtx expand_builtin_bcopy (tree);
--- 109,115 ----
  static rtx expand_builtin_strncat (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strspn (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strcspn (tree, rtx, enum machine_mode);
! static rtx expand_builtin_memcpy (enum built_in_function, tree, rtx, enum machine_mode);
  static rtx expand_builtin_mempcpy (tree, tree, rtx, enum machine_mode, int);
  static rtx expand_builtin_memmove (tree, tree, rtx, enum machine_mode);
  static rtx expand_builtin_bcopy (tree);
*************** static rtx builtin_strncpy_read_str (voi
*** 119,125 ****
  static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
  static rtx builtin_memset_read_str (void *, HOST_WIDE_INT, enum machine_mode);
  static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
! static rtx expand_builtin_memset (tree, rtx, enum machine_mode, tree);
  static rtx expand_builtin_bzero (tree);
  static rtx expand_builtin_strlen (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strstr (tree, tree, rtx, enum machine_mode);
--- 119,125 ----
  static rtx expand_builtin_strncpy (tree, rtx, enum machine_mode);
  static rtx builtin_memset_read_str (void *, HOST_WIDE_INT, enum machine_mode);
  static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, enum machine_mode);
! static rtx expand_builtin_memset (enum built_in_function, tree, rtx, enum machine_mode, tree);
  static rtx expand_builtin_bzero (tree);
  static rtx expand_builtin_strlen (tree, rtx, enum machine_mode);
  static rtx expand_builtin_strstr (tree, tree, rtx, enum machine_mode);
*************** static tree fold_builtin_classify_type (
*** 140,146 ****
  static tree fold_builtin_strlen (tree);
  static tree fold_builtin_inf (tree, int);
  static tree fold_builtin_nan (tree, tree, int);
- static int validate_arglist (tree, ...);
  static bool integer_valued_real_p (tree);
  static tree fold_trunc_transparent_mathfn (tree, tree);
  static bool readonly_data_expr (tree);
--- 140,145 ----
*************** builtin_memcpy_read_str (void *data, HOS
*** 2899,2910 ****
     otherwise try to get the result in TARGET, if convenient (and in
     mode MODE if that's convenient).  */
  static rtx
! expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode)
  {
    tree fndecl = get_callee_fndecl (exp);
    tree arglist = TREE_OPERAND (exp, 1);
!   if (!validate_arglist (arglist,
! 			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
      return 0;
    else
      {
--- 2898,2917 ----
     otherwise try to get the result in TARGET, if convenient (and in
     mode MODE if that's convenient).  */
  static rtx
! expand_builtin_memcpy (enum built_in_function fcode, tree exp, rtx target, enum machine_mode mode)
  {
    tree fndecl = get_callee_fndecl (exp);
    tree arglist = TREE_OPERAND (exp, 1);
!   if (fcode == BUILT_IN_MEMCPY
!       && !validate_arglist (arglist,
! 			    POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE,
! 			    VOID_TYPE))
!     return 0;
!   else if (fcode == BUILT_IN_MEMCPY_HINTS
! 	   && !validate_arglist (arglist,
! 				 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE,
! 				 INTEGER_TYPE, INTEGER_TYPE, INTEGER_TYPE,
! 				 VOID_TYPE))
      return 0;
    else
      {
*************** expand_builtin_memcpy (tree exp, rtx tar
*** 2916,2923 ****
        unsigned int dest_align
  	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
        rtx dest_mem, src_mem, dest_addr, len_rtx;
!       tree result = fold_builtin_memory_op (arglist, TREE_TYPE (TREE_TYPE (fndecl)),
  					    false, /*endp=*/0);
  
        if (result)
  	{
--- 2923,2935 ----
        unsigned int dest_align
  	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
        rtx dest_mem, src_mem, dest_addr, len_rtx;
!       tree result = fold_builtin_memory_op (arglist,
! 					    TREE_TYPE (TREE_TYPE (fndecl)),
  					    false, /*endp=*/0);
+       HOST_WIDE_INT expected_size = -1, expected_align = -1;
+ 
+       if (expected_align < (int)dest_align)
+ 	dest_align = expected_align;
  
        if (result)
  	{
*************** expand_builtin_memcpy (tree exp, rtx tar
*** 2939,2944 ****
--- 2951,2981 ----
        if (src_align == 0)
  	return 0;
  
+       if (fcode == BUILT_IN_MEMCPY_HINTS)
+ 	{
+ 	  tree known_align_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ 	  tree expected_align_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))));
+ 	  tree expected_size_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))))));
+ 	  unsigned int known_align = 0;
+ 
+ 	  if (TREE_CODE (known_align_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    known_align = int_cst_value (known_align_exp);
+ 	  if (TREE_CODE (expected_align_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    expected_align = int_cst_value (expected_align_exp);
+ 	  if (TREE_CODE (expected_size_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    expected_size = int_cst_value (expected_size_exp);
+ 	  if (known_align > dest_align)
+ 	    dest_align = known_align;
+ 	  if (expected_size >= 0)
+ 	    fprintf (stderr, "Expanding with %i\n", (int) expected_size);
+ 	}
+ 
        dest_mem = get_memory_rtx (dest, len);
        set_mem_align (dest_mem, dest_align);
        len_rtx = expand_normal (len);
*************** expand_builtin_memcpy (tree exp, rtx tar
*** 2965,2973 ****
        set_mem_align (src_mem, src_align);
  
        /* Copy word part most expediently.  */
!       dest_addr = emit_block_move (dest_mem, src_mem, len_rtx,
! 				   CALL_EXPR_TAILCALL (exp)
! 				   ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL);
  
        if (dest_addr == 0)
  	{
--- 3002,3011 ----
        set_mem_align (src_mem, src_align);
  
        /* Copy word part most expediently.  */
!       dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
! 				         CALL_EXPR_TAILCALL (exp)
! 				         ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
! 					 expected_align, expected_size);
  
        if (dest_addr == 0)
  	{
*************** builtin_memset_gen_str (void *data, HOST
*** 3443,3466 ****
     convenient).  */
  
  static rtx
! expand_builtin_memset (tree arglist, rtx target, enum machine_mode mode,
! 		       tree orig_exp)
  {
!   if (!validate_arglist (arglist,
! 			 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
      return 0;
    else
      {
        tree dest = TREE_VALUE (arglist);
        tree val = TREE_VALUE (TREE_CHAIN (arglist));
        tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
!       tree fndecl, fn;
!       enum built_in_function fcode;
        char c;
        unsigned int dest_align;
        rtx dest_mem, dest_addr, len_rtx;
  
        dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
  
        /* If DEST is not a pointer type, don't do this
  	 operation in-line.  */
--- 3481,3538 ----
     convenient).  */
  
  static rtx
! expand_builtin_memset (enum built_in_function fcode, tree arglist, rtx target,
! 		       enum machine_mode mode, tree orig_exp)
  {
!   if (fcode == BUILT_IN_MEMSET
!       && !validate_arglist (arglist,
! 			    POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
!     return 0;
!   else if (fcode == BUILT_IN_MEMSET_HINTS
! 	   && !validate_arglist (arglist,
! 				 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE,
! 				 INTEGER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
      return 0;
    else
      {
        tree dest = TREE_VALUE (arglist);
        tree val = TREE_VALUE (TREE_CHAIN (arglist));
        tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
!       tree fndecl = get_callee_fndecl (orig_exp);
!       tree fn;
!       enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
        char c;
        unsigned int dest_align;
        rtx dest_mem, dest_addr, len_rtx;
+       HOST_WIDE_INT expected_size = -1, expected_align = -1;
  
        dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
+       if (fcode == BUILT_IN_MEMSET_HINTS)
+ 	{
+ 	  tree known_align_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ 	  tree expected_align_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))));
+ 	  tree expected_size_exp = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))))));
+ 	  unsigned int known_align = 0;
+ 
+ 	  if (TREE_CODE (known_align_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    known_align = int_cst_value (known_align_exp);
+ 	  if (TREE_CODE (expected_align_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    expected_align = int_cst_value (expected_align_exp);
+ 	  if (TREE_CODE (expected_size_exp) != INTEGER_CST)
+ 	    error ("argument of %<__builtin_memset_hints%> must be constant");
+ 	  else
+ 	    expected_size = int_cst_value (expected_size_exp);
+ 	  if (known_align > dest_align)
+ 	    dest_align = known_align;
+ 	  if (expected_size >= 0)
+ 	    fprintf (stderr, "Expanding with %i\n", (int) expected_size);
+ 	}
+       if (expected_align < (int)dest_align)
+ 	dest_align = expected_align;
  
        /* If DEST is not a pointer type, don't do this
  	 operation in-line.  */
*************** expand_builtin_memset (tree arglist, rtx
*** 3506,3512 ****
  			       builtin_memset_gen_str, val_rtx, dest_align, 0);
  	    }
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
! 					    dest_align))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
--- 3578,3585 ----
  			       builtin_memset_gen_str, val_rtx, dest_align, 0);
  	    }
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
! 					    dest_align, expected_align,
! 					    expected_size))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
*************** expand_builtin_memset (tree arglist, rtx
*** 3526,3532 ****
  	    store_by_pieces (dest_mem, tree_low_cst (len, 1),
  			     builtin_memset_read_str, &c, dest_align, 0);
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
! 					    dest_align))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
--- 3599,3606 ----
  	    store_by_pieces (dest_mem, tree_low_cst (len, 1),
  			     builtin_memset_read_str, &c, dest_align, 0);
  	  else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c),
! 					    dest_align, expected_align,
! 					    expected_size))
  	    goto do_libcall;
  
  	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
*************** expand_builtin_memset (tree arglist, rtx
*** 3549,3555 ****
  
      do_libcall:
        fndecl = get_callee_fndecl (orig_exp);
-       fcode = DECL_FUNCTION_CODE (fndecl);
        gcc_assert (fcode == BUILT_IN_MEMSET || fcode == BUILT_IN_BZERO);
        arglist = build_tree_list (NULL_TREE, len);
        if (fcode == BUILT_IN_MEMSET)
--- 3623,3628 ----
*************** expand_builtin_bzero (tree exp)
*** 3586,3592 ****
    newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist);
    newarglist = tree_cons (NULL_TREE, dest, newarglist);
  
!   return expand_builtin_memset (newarglist, const0_rtx, VOIDmode, exp);
  }
  
  /* Expand expression EXP, which is a call to the memcmp built-in function.
--- 3659,3665 ----
    newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist);
    newarglist = tree_cons (NULL_TREE, dest, newarglist);
  
!   return expand_builtin_memset (BUILT_IN_MEMSET, newarglist, const0_rtx, VOIDmode, exp);
  }
  
  /* Expand expression EXP, which is a call to the memcmp built-in function.
*************** expand_builtin (tree exp, rtx target, rt
*** 6015,6022 ****
  	return target;
        break;
  
      case BUILT_IN_MEMCPY:
!       target = expand_builtin_memcpy (exp, target, mode);
        if (target)
  	return target;
        break;
--- 6088,6096 ----
  	return target;
        break;
  
+     case BUILT_IN_MEMCPY_HINTS:
      case BUILT_IN_MEMCPY:
!       target = expand_builtin_memcpy (fcode, exp, target, mode);
        if (target)
  	return target;
        break;
*************** expand_builtin (tree exp, rtx target, rt
*** 6041,6047 ****
        break;
  
      case BUILT_IN_MEMSET:
!       target = expand_builtin_memset (arglist, target, mode, exp);
        if (target)
  	return target;
        break;
--- 6115,6122 ----
        break;
  
      case BUILT_IN_MEMSET:
!     case BUILT_IN_MEMSET_HINTS:
!       target = expand_builtin_memset (fcode, arglist, target, mode, exp);
        if (target)
  	return target;
        break;
*************** fold_builtin_memory_op (tree arglist, tr
*** 8032,8038 ****
    unsigned HOST_WIDE_INT length;
  
    if (! validate_arglist (arglist,
! 			  POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
      return 0;
  
    dest = TREE_VALUE (arglist);
--- 8107,8117 ----
    unsigned HOST_WIDE_INT length;
  
    if (! validate_arglist (arglist,
! 			  POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)
!       && ! validate_arglist (arglist,
! 			     POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE,
! 			     INTEGER_TYPE, INTEGER_TYPE, INTEGER_TYPE,
! 			     VOID_TYPE))
      return 0;
  
    dest = TREE_VALUE (arglist);
*************** build_function_call_expr (tree fn, tree 
*** 9299,9305 ****
     of tree_codes.  If the last specifier is a 0, that represents an
     ellipses, otherwise the last specifier must be a VOID_TYPE.  */
  
! static int
  validate_arglist (tree arglist, ...)
  {
    enum tree_code code;
--- 9378,9384 ----
     of tree_codes.  If the last specifier is a 0, that represents an
     ellipses, otherwise the last specifier must be a VOID_TYPE.  */
  
! int
  validate_arglist (tree arglist, ...)
  {
    enum tree_code code;
Index: builtin-types.def
===================================================================
*** builtin-types.def	(revision 118043)
--- builtin-types.def	(working copy)
*************** DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LON
*** 393,398 ****
--- 393,402 ----
  DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
  		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
  		     BT_LONG, BT_LONG, BT_LONG)
+ DEF_FUNCTION_TYPE_6 (BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE_SIZE_SIZE,
+ 		     BT_PTR, BT_PTR, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_SIZE, BT_SIZE)
+ DEF_FUNCTION_TYPE_6 (BT_FN_PTR_PTR_INT_SIZE_SIZE_SIZE_SIZE,
+ 		     BT_PTR, BT_PTR, BT_INT, BT_SIZE, BT_SIZE, BT_SIZE, BT_SIZE)
  
  DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
  		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,
Index: builtins.def
===================================================================
*** builtins.def	(revision 118043)
--- builtins.def	(working copy)
*************** DEF_EXT_LIB_BUILTIN    (BUILT_IN_BZERO, 
*** 498,506 ****
--- 498,510 ----
  DEF_EXT_LIB_BUILTIN    (BUILT_IN_INDEX, "index", BT_FN_STRING_CONST_STRING_INT, ATTR_PURE_NOTHROW_NONNULL)
  DEF_LIB_BUILTIN        (BUILT_IN_MEMCMP, "memcmp", BT_FN_INT_CONST_PTR_CONST_PTR_SIZE, ATTR_PURE_NOTHROW_NONNULL)
  DEF_LIB_BUILTIN        (BUILT_IN_MEMCPY, "memcpy", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL)
+ DEF_LIB_BUILTIN        (BUILT_IN_MEMCPY_HINTS, "memcpy_hints", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE_SIZE_SIZE,
+ 			ATTR_NOTHROW_NONNULL)
  DEF_LIB_BUILTIN        (BUILT_IN_MEMMOVE, "memmove", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL)
  DEF_EXT_LIB_BUILTIN    (BUILT_IN_MEMPCPY, "mempcpy", BT_FN_PTR_PTR_CONST_PTR_SIZE, ATTR_NOTHROW_NONNULL)
  DEF_LIB_BUILTIN        (BUILT_IN_MEMSET, "memset", BT_FN_PTR_PTR_INT_SIZE, ATTR_NOTHROW_NONNULL)
+ DEF_LIB_BUILTIN        (BUILT_IN_MEMSET_HINTS, "memset_hints", BT_FN_PTR_PTR_INT_SIZE_SIZE_SIZE_SIZE,
+ 		        ATTR_NOTHROW_NONNULL)
  DEF_EXT_LIB_BUILTIN    (BUILT_IN_RINDEX, "rindex", BT_FN_STRING_CONST_STRING_INT, ATTR_PURE_NOTHROW_NONNULL)
  DEF_EXT_LIB_BUILTIN    (BUILT_IN_STPCPY, "stpcpy", BT_FN_STRING_STRING_CONST_STRING, ATTR_NOTHROW_NONNULL)
  DEF_EXT_LIB_BUILTIN    (BUILT_IN_STPNCPY, "stpncpy", BT_FN_STRING_STRING_CONST_STRING_SIZE, ATTR_NOTHROW_NONNULL)
Index: expr.c
===================================================================
*** expr.c	(revision 118043)
--- expr.c	(working copy)
*************** static unsigned HOST_WIDE_INT move_by_pi
*** 126,133 ****
  static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
  			      struct move_by_pieces *);
  static bool block_move_libcall_safe_for_call_parm (void);
! static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned);
! static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
  static tree emit_block_move_libcall_fn (int);
  static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
  static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
--- 126,132 ----
  static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
  			      struct move_by_pieces *);
  static bool block_move_libcall_safe_for_call_parm (void);
! static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
  static tree emit_block_move_libcall_fn (int);
  static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
  static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
*************** static void clear_by_pieces (rtx, unsign
*** 135,141 ****
  static void store_by_pieces_1 (struct store_by_pieces *, unsigned int);
  static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
  			       struct store_by_pieces *);
- static rtx clear_storage_via_libcall (rtx, rtx, bool);
  static tree clear_storage_libcall_fn (int);
  static rtx compress_float_constant (rtx, rtx);
  static rtx get_subtarget (rtx);
--- 134,139 ----
*************** move_by_pieces_1 (rtx (*genfun) (rtx, ..
*** 1149,1155 ****
     0 otherwise.  */
  
  rtx
! emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
  {
    bool may_use_call;
    rtx retval = 0;
--- 1147,1154 ----
     0 otherwise.  */
  
  rtx
! emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
! 		       unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    bool may_use_call;
    rtx retval = 0;
*************** emit_block_move (rtx x, rtx y, rtx size,
*** 1204,1210 ****
  
    if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align))
      move_by_pieces (x, y, INTVAL (size), align, 0);
!   else if (emit_block_move_via_movmem (x, y, size, align))
      ;
    else if (may_use_call)
      retval = emit_block_move_via_libcall (x, y, size,
--- 1203,1210 ----
  
    if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align))
      move_by_pieces (x, y, INTVAL (size), align, 0);
!   else if (emit_block_move_via_movmem (x, y, size, align,
! 				       expected_align, expected_size))
      ;
    else if (may_use_call)
      retval = emit_block_move_via_libcall (x, y, size,
*************** emit_block_move (rtx x, rtx y, rtx size,
*** 1218,1223 ****
--- 1218,1229 ----
    return retval;
  }
  
+ rtx
+ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
+ {
+   return emit_block_move_hints (x, y, size, method, 0, -1);
+ }
+ 
  /* A subroutine of emit_block_move.  Returns true if calling the
     block move libcall will not clobber any parameters which may have
     already been placed on the stack.  */
*************** block_move_libcall_safe_for_call_parm (v
*** 1268,1279 ****
     return true if successful.  */
  
  static bool
! emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align)
  {
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    int save_volatile_ok = volatile_ok;
    enum machine_mode mode;
  
    /* Since this is a move insn, we don't care about volatility.  */
    volatile_ok = 1;
  
--- 1274,1289 ----
     return true if successful.  */
  
  static bool
! emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
! 			    unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    int save_volatile_ok = volatile_ok;
    enum machine_mode mode;
  
+   if (expected_align < align)
+     expected_align = align;
+ 
    /* Since this is a move insn, we don't care about volatility.  */
    volatile_ok = 1;
  
*************** emit_block_move_via_movmem (rtx x, rtx y
*** 1317,1323 ****
  	     that it doesn't fail the expansion because it thinks
  	     emitting the libcall would be more efficient.  */
  
! 	  pat = GEN_FCN ((int) code) (x, y, op2, opalign);
  	  if (pat)
  	    {
  	      emit_insn (pat);
--- 1327,1338 ----
  	     that it doesn't fail the expansion because it thinks
  	     emitting the libcall would be more efficient.  */
  
! 	  if (insn_data[(int) code].n_operands == 4)
! 	    pat = GEN_FCN ((int) code) (x, y, op2, opalign);
! 	  else
! 	    pat = GEN_FCN ((int) code) (x, y, op2, opalign,
! 					GEN_INT (expected_align),
! 					GEN_INT (expected_size));
  	  if (pat)
  	    {
  	      emit_insn (pat);
*************** emit_block_move_via_movmem (rtx x, rtx y
*** 1336,1342 ****
  /* A subroutine of emit_block_move.  Expand a call to memcpy.
     Return the return value from memcpy, 0 otherwise.  */
  
! static rtx
  emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
  {
    rtx dst_addr, src_addr;
--- 1351,1357 ----
  /* A subroutine of emit_block_move.  Expand a call to memcpy.
     Return the return value from memcpy, 0 otherwise.  */
  
! rtx
  emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
  {
    rtx dst_addr, src_addr;
*************** store_by_pieces_2 (rtx (*genfun) (rtx, .
*** 2497,2503 ****
     its length in bytes.  */
  
  rtx
! clear_storage (rtx object, rtx size, enum block_op_methods method)
  {
    enum machine_mode mode = GET_MODE (object);
    unsigned int align;
--- 2512,2519 ----
     its length in bytes.  */
  
  rtx
! clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
! 		     unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    enum machine_mode mode = GET_MODE (object);
    unsigned int align;
*************** clear_storage (rtx object, rtx size, enu
*** 2537,2543 ****
    if (GET_CODE (size) == CONST_INT
        && CLEAR_BY_PIECES_P (INTVAL (size), align))
      clear_by_pieces (object, INTVAL (size), align);
!   else if (set_storage_via_setmem (object, size, const0_rtx, align))
      ;
    else
      return clear_storage_via_libcall (object, size,
--- 2553,2560 ----
    if (GET_CODE (size) == CONST_INT
        && CLEAR_BY_PIECES_P (INTVAL (size), align))
      clear_by_pieces (object, INTVAL (size), align);
!   else if (set_storage_via_setmem (object, size, const0_rtx, align,
! 				   expected_align, expected_size))
      ;
    else
      return clear_storage_via_libcall (object, size,
*************** clear_storage (rtx object, rtx size, enu
*** 2546,2555 ****
    return NULL;
  }
  
  /* A subroutine of clear_storage.  Expand a call to memset.
     Return the return value of memset, 0 otherwise.  */
  
! static rtx
  clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
  {
    tree call_expr, arg_list, fn, object_tree, size_tree;
--- 2563,2578 ----
    return NULL;
  }
  
+ rtx
+ clear_storage (rtx object, rtx size, enum block_op_methods method)
+ {
+   return clear_storage_hints (object, size, method, 0, -1);
+ }
+ 
  /* A subroutine of clear_storage.  Expand a call to memset.
     Return the return value of memset, 0 otherwise.  */
  
! rtx
  clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
  {
    tree call_expr, arg_list, fn, object_tree, size_tree;
*************** clear_storage_libcall_fn (int for_call)
*** 2644,2650 ****
  /* Expand a setmem pattern; return true if successful.  */
  
  bool
! set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align)
  {
    /* Try the most limited insn first, because there's no point
       including more than one in the machine description unless
--- 2667,2674 ----
  /* Expand a setmem pattern; return true if successful.  */
  
  bool
! set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
! 			unsigned int expected_align, HOST_WIDE_INT expected_size)
  {
    /* Try the most limited insn first, because there's no point
       including more than one in the machine description unless
*************** set_storage_via_setmem (rtx object, rtx 
*** 2653,2658 ****
--- 2677,2685 ----
    rtx opalign = GEN_INT (align / BITS_PER_UNIT);
    enum machine_mode mode;
  
+   if (expected_align < align)
+     expected_align = align;
+ 
    for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
         mode = GET_MODE_WIDER_MODE (mode))
      {
*************** set_storage_via_setmem (rtx object, rtx 
*** 2693,2699 ****
  		opchar = copy_to_mode_reg (char_mode, opchar);
  	    }
  
! 	  pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign);
  	  if (pat)
  	    {
  	      emit_insn (pat);
--- 2720,2731 ----
  		opchar = copy_to_mode_reg (char_mode, opchar);
  	    }
  
! 	  if (insn_data[(int) code].n_operands == 4)
! 	    pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign);
! 	  else
! 	    pat = GEN_FCN ((int) code) (object, opsize, opchar, opalign,
! 					GEN_INT (expected_align),
! 					GEN_INT (expected_size));
  	  if (pat)
  	    {
  	      emit_insn (pat);
Index: expr.h
===================================================================
*** expr.h	(revision 118043)
--- expr.h	(working copy)
*************** extern void init_block_move_fn (const ch
*** 377,382 ****
--- 377,385 ----
  extern void init_block_clear_fn (const char *);
  
  extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
+ extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
+ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
+ 			          unsigned int, HOST_WIDE_INT);
  
  /* Copy all or part of a value X into registers starting at REGNO.
     The number of registers to be filled is NREGS.  */
*************** extern void use_group_regs (rtx *, rtx);
*** 423,431 ****
  /* Write zeros through the storage of OBJECT.
     If OBJECT has BLKmode, SIZE is its length in bytes.  */
  extern rtx clear_storage (rtx, rtx, enum block_op_methods);
  
  /* Expand a setmem pattern; return true if successful.  */
! extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int);
  
  /* Determine whether the LEN bytes can be moved by using several move
     instructions.  Return nonzero if a call to move_by_pieces should
--- 426,440 ----
  /* Write zeros through the storage of OBJECT.
     If OBJECT has BLKmode, SIZE is its length in bytes.  */
  extern rtx clear_storage (rtx, rtx, enum block_op_methods);
+ extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods,
+ 			        unsigned int, HOST_WIDE_INT);
+ 
+ /* The same, but always output an library call.  */
+ rtx clear_storage_via_libcall (rtx, rtx, bool);
  
  /* Expand a setmem pattern; return true if successful.  */
! extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
! 				    unsigned int, HOST_WIDE_INT);
  
  /* Determine whether the LEN bytes can be moved by using several move
     instructions.  Return nonzero if a call to move_by_pieces should


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]