RFA: tweak integer type used for memcpy folding

Richard Sandiford rdsandiford@googlemail.com
Tue Apr 22 07:39:00 GMT 2014


Richard Sandiford <rdsandiford@googlemail.com> writes:
> wide-int fails to build libitm because of a bad interaction between:
>
> /* Keep the OI and XI modes from confusing the compiler into thinking
>    that these modes could actually be used for computation.  They are
>    only holders for vectors during data movement.  */
> #define MAX_BITSIZE_MODE_ANY_INT (128)
>
> and the memcpy folding code:
>
>       /* Make sure we are not copying using a floating-point mode or
>          a type whose size possibly does not match its precision.  */
>       if (FLOAT_MODE_P (TYPE_MODE (desttype))
> 	  || TREE_CODE (desttype) == BOOLEAN_TYPE
> 	  || TREE_CODE (desttype) == ENUMERAL_TYPE)
> 	{
> 	  /* A more suitable int_mode_for_mode would return a vector
> 	     integer mode for a vector float mode or a integer complex
> 	     mode for a float complex mode if there isn't a regular
> 	     integer mode covering the mode of desttype.  */
> 	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
> 	  if (mode == BLKmode)
> 	    desttype = NULL_TREE;
> 	  else
> 	    desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
> 						       1);
> 	}
>       if (FLOAT_MODE_P (TYPE_MODE (srctype))
> 	  || TREE_CODE (srctype) == BOOLEAN_TYPE
> 	  || TREE_CODE (srctype) == ENUMERAL_TYPE)
> 	{
> 	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
> 	  if (mode == BLKmode)
> 	    srctype = NULL_TREE;
> 	  else
> 	    srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
> 						      1);
> 	}
>
> The failure occurs for complex long double, which we try to copy as
> a 256-bit integer type (OImode).
>
> This patch tries to do what the comment suggests by introducing a new
> form of int_mode_for_mode that replaces vector modes with vector modes
> and complex modes with complex modes.  The fallback case of using a
> MODE_INT is limited by MAX_FIXED_MODE_SIZE, so can never go above
> 128 bits on x86_64.
>
> The question then is what to do about 128-bit types for i386.
> MAX_FIXED_MODE_SIZE is 64 there, which says that int128_t shouldn't be
> used for optimisation.  However, gcc.target/i386/pr49168-1.c only passes
> for -m32 -msse2 because we use int128_t to copy a float128_t.
>
> I handled that by allowing MODE_VECTOR_INT to be used instead of
> MODE_INT if the mode size is greater than MAX_FIXED_MODE_SIZE,
> even if the original type wasn't a vector.
>
> It might be that other callers to int_mode_for_mode should use
> the new function too, but I'll look at that separately.
>
> I used the attached testcase (with printfs added to gcc) to check that
> the right modes and types were being chosen.  The patch fixes the
> complex float and complex double cases, since the integer type that we
> previously picked had a larger alignment than the original complex type.
>
> One possibly subtle side-effect of FLOAT_MODE_P (TYPE_MODE (desttype))
> is that vectors are copied as integer vectors if the target supports
> them directly but are copied as float vectors otherwise, since in the
> latter case the mode will be BLKmode.  E.g. the 1024-bit vectors in the
> test are copied as vector floats and vector doubles both before and
> after the patch.
>
> Tested against trunk with x86_64-linux-gnu {,-m32}.  OK to install?

There was a typo in the declaration of the mode->mode function,
should have been as follows.

Thanks,
Richard


gcc/
	* machmode.h (bitwise_mode_for_mode): Declare.
	* stor-layout.h (bitwise_type_for_mode): Likewise.
	* stor-layout.c (bitwise_mode_for_mode): New function.
	(bitwise_type_for_mode): Likewise.
	* builtins.c (fold_builtin_memory_op): Use it instead of
	int_mode_for_mode and build_nonstandard_integer_type.

gcc/testsuite/
	* gcc.dg/memcpy-5.c: New test.

Index: gcc/machmode.h
===================================================================
--- gcc/machmode.h	2014-04-21 10:35:17.611603989 +0100
+++ gcc/machmode.h	2014-04-21 13:58:59.403884452 +0100
@@ -253,6 +253,8 @@ extern enum machine_mode smallest_mode_f
 
 extern enum machine_mode int_mode_for_mode (enum machine_mode);
 
+extern enum machine_mode bitwise_mode_for_mode (enum machine_mode);
+
 /* Return a mode that is suitable for representing a vector,
    or BLKmode on failure.  */
 
Index: gcc/stor-layout.h
===================================================================
--- gcc/stor-layout.h	2014-04-21 10:35:17.611603989 +0100
+++ gcc/stor-layout.h	2014-04-21 13:58:59.405878960 +0100
@@ -98,6 +98,8 @@ extern tree make_unsigned_type (int);
    mode_for_size, but is passed a tree.  */
 extern enum machine_mode mode_for_size_tree (const_tree, enum mode_class, int);
 
+extern tree bitwise_type_for_mode (enum machine_mode);
+
 /* Given a VAR_DECL, PARM_DECL or RESULT_DECL, clears the results of
    a previous call to layout_decl and calls it again.  */
 extern void relayout_decl (tree);
Index: gcc/stor-layout.c
===================================================================
--- gcc/stor-layout.c	2014-04-21 10:35:17.611603989 +0100
+++ gcc/stor-layout.c	2014-04-21 13:58:59.404881706 +0100
@@ -403,6 +403,73 @@ int_mode_for_mode (enum machine_mode mod
   return mode;
 }
 
+/* Find a mode that can be used for efficient bitwise operations on MODE.
+   Return BLKmode if no such mode exists.  */
+
+enum machine_mode
+bitwise_mode_for_mode (enum machine_mode mode)
+{
+  /* Quick exit if we already have a suitable mode.  */
+  unsigned int bitsize = GET_MODE_BITSIZE (mode);
+  if (SCALAR_INT_MODE_P (mode) && bitsize <= MAX_FIXED_MODE_SIZE)
+    return mode;
+
+  /* Reuse the sanity checks from int_mode_for_mode.  */
+  gcc_checking_assert ((int_mode_for_mode (mode), true));
+
+  /* Try to replace complex modes with complex modes.  In general we
+     expect both components to be processed independently, so we only
+     care whether there is a register for the inner mode.  */
+  if (COMPLEX_MODE_P (mode))
+    {
+      enum machine_mode trial = mode;
+      if (GET_MODE_CLASS (mode) != MODE_COMPLEX_INT)
+	trial = mode_for_size (bitsize, MODE_COMPLEX_INT, false);
+      if (trial != BLKmode
+	  && have_regs_of_mode[GET_MODE_INNER (trial)])
+	return trial;
+    }
+
+  /* Try to replace vector modes with vector modes.  Also try using vector
+     modes if an integer mode would be too big.  */
+  if (VECTOR_MODE_P (mode) || bitsize > MAX_FIXED_MODE_SIZE)
+    {
+      enum machine_mode trial = mode;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	trial = mode_for_size (bitsize, MODE_VECTOR_INT, 0);
+      if (trial != BLKmode
+	  && have_regs_of_mode[trial]
+	  && targetm.vector_mode_supported_p (trial))
+	return trial;
+    }
+
+  /* Otherwise fall back on integers while honoring MAX_FIXED_MODE_SIZE.  */
+  return mode_for_size (bitsize, MODE_INT, true);
+}
+
+/* Find a type that can be used for efficient bitwise operations on MODE.
+   Return null if no such mode exists.  */
+
+tree
+bitwise_type_for_mode (enum machine_mode mode)
+{
+  mode = bitwise_mode_for_mode (mode);
+  if (mode == BLKmode)
+    return NULL_TREE;
+
+  unsigned int inner_size = GET_MODE_UNIT_BITSIZE (mode);
+  tree inner_type = build_nonstandard_integer_type (inner_size, true);
+
+  if (VECTOR_MODE_P (mode))
+    return build_vector_type_for_mode (inner_type, mode);
+
+  if (COMPLEX_MODE_P (mode))
+    return build_complex_type (inner_type);
+
+  gcc_checking_assert (GET_MODE_INNER (mode) == VOIDmode);
+  return inner_type;
+}
+
 /* Find a mode that is suitable for representing a vector with
    NUNITS elements of mode INNERMODE.  Returns BLKmode if there
    is no suitable mode.  */
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	2014-04-21 10:35:17.611603989 +0100
+++ gcc/builtins.c	2014-04-21 13:58:59.390920150 +0100
@@ -8921,29 +8921,11 @@ fold_builtin_memory_op (location_t loc,
       if (FLOAT_MODE_P (TYPE_MODE (desttype))
 	  || TREE_CODE (desttype) == BOOLEAN_TYPE
 	  || TREE_CODE (desttype) == ENUMERAL_TYPE)
-	{
-	  /* A more suitable int_mode_for_mode would return a vector
-	     integer mode for a vector float mode or a integer complex
-	     mode for a float complex mode if there isn't a regular
-	     integer mode covering the mode of desttype.  */
-	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
-	  if (mode == BLKmode)
-	    desttype = NULL_TREE;
-	  else
-	    desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
-						       1);
-	}
+	desttype = bitwise_type_for_mode (TYPE_MODE (desttype));
       if (FLOAT_MODE_P (TYPE_MODE (srctype))
 	  || TREE_CODE (srctype) == BOOLEAN_TYPE
 	  || TREE_CODE (srctype) == ENUMERAL_TYPE)
-	{
-	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
-	  if (mode == BLKmode)
-	    srctype = NULL_TREE;
-	  else
-	    srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
-						      1);
-	}
+	srctype = bitwise_type_for_mode (TYPE_MODE (srctype));
       if (!srctype)
 	srctype = desttype;
       if (!desttype)
Index: gcc/testsuite/gcc.dg/memcpy-5.c
===================================================================
--- /dev/null	2014-04-15 08:10:27.294524132 +0100
+++ gcc/testsuite/gcc.dg/memcpy-5.c	2014-04-21 13:58:59.469703199 +0100
@@ -0,0 +1,27 @@
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+#define TEST(NAME, TYPE) \
+  TYPE NAME##x; \
+  char NAME##y[sizeof (NAME##x)] __attribute__((aligned (__alignof__ (NAME##x)))); \
+  void NAME (void) { memcpy (&NAME##x, &NAME##y, sizeof (NAME##x)); }
+
+TEST (f, float);
+TEST (d, double);
+TEST (ld, long double);
+TEST (cf, _Complex float);
+TEST (cd, _Complex double);
+TEST (cld, _Complex long double);
+TEST (d8f, float __attribute__((vector_size (8))));
+TEST (d16f, float __attribute__((vector_size (16))));
+TEST (d32f, float __attribute__((vector_size (32))));
+TEST (d64f, float __attribute__((vector_size (64))));
+TEST (d128f, float __attribute__((vector_size (128))));
+TEST (d16d, double __attribute__((vector_size (16))));
+TEST (d32d, double __attribute__((vector_size (32))));
+TEST (d64d, double __attribute__((vector_size (64))));
+TEST (d128d, double __attribute__((vector_size (128))));
+
+/* { dg-final { scan-tree-dump-not "memcpy" "optimized" { target x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */



More information about the Gcc-patches mailing list