This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Lift alignment restrictions from inlining register size memcpy


The following patch does $subject which is requested in a comment in
PR50417 as this restriction defeats the purpose of having memcpy
as a portable and efficient way to circumvent strict-aliasing violations
(or even as a portable and efficient way to do unaligned loads).

Bootstrap / regtest running on x86_64-unknown-linux-gnu (quite pointless
as there is no change on that arch).

I have added a testcase that should exercise most relevant cases so
we can look for fallout on "interesting" targets.

Boostrap / regtest on strict-alignment platforms welcome.

I do expect that with -Os expanding unaligned-unaligned moves
might be a size pessimization compared to a libcall (or what
the targets block move expander does).  But the whole point is
to remove abstraction penalty so it isn't an easy stmt-local
decision to make.  Comments on this front welcome as well.

Unless I hear some positives I'll let the patch sit here as I
don't really care too much about those pesky targets (and
targets can choose to opt-in by providing movmisalign optabs
anyway so they don't go the store/extract_bit_field path).

Thanks,
Richard.

2016-07-12  Richard Biener  <rguenther@suse.de>

	* gimple-fold.c (gimple_fold_builtin_memory_op): Lift alignment
	restrictions from inlining register size memcpy.

	* gcc.dg/torture/builtin-memcpy.c: New testcase.

Index: gcc/gimple-fold.c
===================================================================
*** gcc/gimple-fold.c	(revision 238237)
--- gcc/gimple-fold.c	(working copy)
*************** gimple_fold_builtin_memory_op (gimple_st
*** 705,718 ****
  	      if (type
  		  && TYPE_MODE (type) != BLKmode
  		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! 		      == ilen * 8)
! 		  /* If the destination pointer is not aligned we must be able
! 		     to emit an unaligned store.  */
! 		  && (dest_align >= GET_MODE_ALIGNMENT (TYPE_MODE (type))
! 		      || !SLOW_UNALIGNED_ACCESS (TYPE_MODE (type), dest_align)
! 		      || (optab_handler (movmisalign_optab, TYPE_MODE (type))
! 			  != CODE_FOR_nothing)))
  		{
  		  tree srctype = type;
  		  tree desttype = type;
  		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
--- 705,718 ----
  	      if (type
  		  && TYPE_MODE (type) != BLKmode
  		  && (GET_MODE_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT
! 		      == ilen * 8))
  		{
+ 		  /* RTL expansion handles misaligned destination / source
+ 		     MEM_REFs either via target provided movmisalign or
+ 		     via extract/store_bit_field for targets that set
+ 		     SLOW_UNALIGNED_ACCESS for the move.  For move
+ 		     quantities up to MOVE_MAX this should be always
+ 		     more efficient than a libcall to memcpy.  */
  		  tree srctype = type;
  		  tree desttype = type;
  		  if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
*************** gimple_fold_builtin_memory_op (gimple_st
*** 721,767 ****
  		  tree tem = fold_const_aggregate_ref (srcmem);
  		  if (tem)
  		    srcmem = tem;
! 		  else if (src_align < GET_MODE_ALIGNMENT (TYPE_MODE (type))
! 			   && SLOW_UNALIGNED_ACCESS (TYPE_MODE (type),
! 						     src_align)
! 			   && (optab_handler (movmisalign_optab,
! 					      TYPE_MODE (type))
! 			       == CODE_FOR_nothing))
! 		    srcmem = NULL_TREE;
! 		  if (srcmem)
  		    {
! 		      gimple *new_stmt;
! 		      if (is_gimple_reg_type (TREE_TYPE (srcmem)))
! 			{
! 			  new_stmt = gimple_build_assign (NULL_TREE, srcmem);
! 			  if (gimple_in_ssa_p (cfun))
! 			    srcmem = make_ssa_name (TREE_TYPE (srcmem),
! 						    new_stmt);
! 			  else
! 			    srcmem = create_tmp_reg (TREE_TYPE (srcmem));
! 			  gimple_assign_set_lhs (new_stmt, srcmem);
! 			  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
! 			  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
! 			}
! 		      if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
! 			desttype = build_aligned_type (type, dest_align);
! 		      new_stmt
! 			= gimple_build_assign (fold_build2 (MEM_REF, desttype,
! 							    dest, off0),
! 					       srcmem);
  		      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
- 		      gimple_set_vdef (new_stmt, gimple_vdef (stmt));
- 		      if (gimple_vdef (new_stmt)
- 			  && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
- 			SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
- 		      if (!lhs)
- 			{
- 			  gsi_replace (gsi, new_stmt, false);
- 			  return true;
- 			}
  		      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
- 		      goto done;
  		    }
  		}
  	    }
  	}
--- 721,756 ----
  		  tree tem = fold_const_aggregate_ref (srcmem);
  		  if (tem)
  		    srcmem = tem;
! 		  gimple *new_stmt;
! 		  if (is_gimple_reg_type (TREE_TYPE (srcmem)))
  		    {
! 		      new_stmt = gimple_build_assign (NULL_TREE, srcmem);
! 		      if (gimple_in_ssa_p (cfun))
! 			srcmem = make_ssa_name (TREE_TYPE (srcmem),
! 						new_stmt);
! 		      else
! 			srcmem = create_tmp_reg (TREE_TYPE (srcmem));
! 		      gimple_assign_set_lhs (new_stmt, srcmem);
  		      gimple_set_vuse (new_stmt, gimple_vuse (stmt));
  		      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
  		    }
+ 		  if (dest_align < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+ 		    desttype = build_aligned_type (type, dest_align);
+ 		  new_stmt
+ 		    = gimple_build_assign (fold_build2 (MEM_REF, desttype,
+ 							dest, off0), srcmem);
+ 		  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
+ 		  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
+ 		  if (gimple_vdef (new_stmt)
+ 		      && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
+ 		    SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ 		  if (!lhs)
+ 		    {
+ 		      gsi_replace (gsi, new_stmt, false);
+ 		      return true;
+ 		    }
+ 		  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ 		  goto done;
  		}
  	    }
  	}
Index: gcc/testsuite/gcc.dg/torture/builtin-memcpy.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/builtin-memcpy.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/builtin-memcpy.c	(working copy)
***************
*** 0 ****
--- 1,89 ----
+ /* { dg-do run } */
+ 
+ char src[32], dest[32];
+ long long llsrc, lldest;
+ 
+ /* Unaligned source/dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyuu16 (long long *src, long long *dest)
+ {
+   __builtin_memcpy (dest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu8 (long *src, long *dest)
+ {
+   __builtin_memcpy (dest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu4 (int *src, int *dest)
+ {
+   __builtin_memcpy (dest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyuu2 (short *src, short *dest)
+ {
+   __builtin_memcpy (dest, src, 2);
+ }
+ 
+ /* Aligned source, unaligned dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyau16 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyau8 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyau4 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyau2 (long long *dest)
+ {
+   __builtin_memcpy (dest, &llsrc, 2);
+ }
+ 
+ /* Unaligned source, aligned dest variants.  */
+ void __attribute__((noinline,noclone))
+ copyua16 (long long *src)
+ {
+   __builtin_memcpy (&lldest, src, 16);
+ }
+ void __attribute__((noinline,noclone))
+ copyua8 (long *src)
+ {
+   __builtin_memcpy (&lldest, src, 8);
+ }
+ void __attribute__((noinline,noclone))
+ copyua4 (int *src)
+ {
+   __builtin_memcpy (&lldest, src, 4);
+ }
+ void __attribute__((noinline,noclone))
+ copyua2 (short *src)
+ {
+   __builtin_memcpy (&lldest, src, 2);
+ }
+ 
+ int main()
+ {
+   void *usrc = (void *)(((__UINTPTR_TYPE__)src & -16) + 17);
+   void *udest = (void *)(((__UINTPTR_TYPE__)dest & -16) + 17);
+   copyuu16 (udest, usrc);
+   copyuu8 (udest, usrc);
+   copyuu4 (udest, usrc);
+   copyuu2 (udest, usrc);
+   copyau16 (usrc);
+   copyau8 (usrc);
+   copyau4 (usrc);
+   copyau2 (usrc);
+   copyua16 (udest);
+   copyua8 (udest);
+   copyua4 (udest);
+   copyua2 (udest);
+   return 0;
+ }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]