This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFA: tweak integer type used for memcpy folding


wide-int fails to build libitm because of a bad interaction between:

/* Keep the OI and XI modes from confusing the compiler into thinking
   that these modes could actually be used for computation.  They are
   only holders for vectors during data movement.  */
#define MAX_BITSIZE_MODE_ANY_INT (128)

and the memcpy folding code:

      /* Make sure we are not copying using a floating-point mode or
         a type whose size possibly does not match its precision.  */
      if (FLOAT_MODE_P (TYPE_MODE (desttype))
	  || TREE_CODE (desttype) == BOOLEAN_TYPE
	  || TREE_CODE (desttype) == ENUMERAL_TYPE)
	{
	  /* A more suitable int_mode_for_mode would return a vector
	     integer mode for a vector float mode or a integer complex
	     mode for a float complex mode if there isn't a regular
	     integer mode covering the mode of desttype.  */
	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
	  if (mode == BLKmode)
	    desttype = NULL_TREE;
	  else
	    desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
						       1);
	}
      if (FLOAT_MODE_P (TYPE_MODE (srctype))
	  || TREE_CODE (srctype) == BOOLEAN_TYPE
	  || TREE_CODE (srctype) == ENUMERAL_TYPE)
	{
	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
	  if (mode == BLKmode)
	    srctype = NULL_TREE;
	  else
	    srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
						      1);
	}

The failure occurs for complex long double, which we try to copy as
a 256-bit integer type (OImode).

This patch tries to do what the comment suggests by introducing a new
form of int_mode_for_mode that replaces vector modes with vector modes
and complex modes with complex modes.  The fallback case of using a
MODE_INT is limited by MAX_FIXED_MODE_SIZE, so can never go above
128 bits on x86_64.

The question then is what to do about 128-bit types for i386.
MAX_FIXED_MODE_SIZE is 64 there, which says that int128_t shouldn't be
used for optimisation.  However, gcc.target/i386/pr49168-1.c only passes
for -m32 -msse2 because we use int128_t to copy a float128_t.

I handled that by allowing MODE_VECTOR_INT to be used instead of
MODE_INT if the mode size is greater than MAX_FIXED_MODE_SIZE,
even if the original type wasn't a vector.

It might be that other callers to int_mode_for_mode should use
the new function too, but I'll look at that separately.

I used the attached testcase (with printfs added to gcc) to check that
the right modes and types were being chosen.  The patch fixes the
complex float and complex double cases, since the integer type that we
previously picked had a larger alignment than the original complex type.

One possibly subtle side-effect of FLOAT_MODE_P (TYPE_MODE (desttype))
is that vectors are copied as integer vectors if the target supports
them directly but are copied as float vectors otherwise, since in the
latter case the mode will be BLKmode.  E.g. the 1024-bit vectors in the
test are copied as vector floats and vector doubles both before and
after the patch.

Tested against trunk with x86_64-linux-gnu {,-m32}.  OK to install?

Thanks,
Richard


gcc/
	* machmode.h (bitwise_mode_for_size): Declare.
	* stor-layout.h (bitwise_type_for_mode): Likewise.
	* stor-layout.c (bitwise_mode_for_size): New function.
	(bitwise_type_for_mode): Likewise.
	* builtins.c (fold_builtin_memory_op): Use it instead of
	int_mode_for_mode and build_nonstandard_integer_type.

gcc/testsuite/
	* gcc.dg/memcpy-5.c: New test.

Index: gcc/machmode.h
===================================================================
--- gcc/machmode.h	2014-04-18 11:16:12.706092658 +0100
+++ gcc/machmode.h	2014-04-18 11:16:38.179299261 +0100
@@ -253,6 +253,8 @@ extern enum machine_mode smallest_mode_f
 
 extern enum machine_mode int_mode_for_mode (enum machine_mode);
 
+extern enum machine_mode bitwise_mode_for_size (enum machine_mode);
+
 /* Return a mode that is suitable for representing a vector,
    or BLKmode on failure.  */
 
Index: gcc/stor-layout.h
===================================================================
--- gcc/stor-layout.h	2014-04-18 11:16:12.707092667 +0100
+++ gcc/stor-layout.h	2014-04-18 11:16:12.830093665 +0100
@@ -98,6 +98,8 @@ extern tree make_unsigned_type (int);
    mode_for_size, but is passed a tree.  */
 extern enum machine_mode mode_for_size_tree (const_tree, enum mode_class, int);
 
+extern tree bitwise_type_for_mode (enum machine_mode);
+
 /* Given a VAR_DECL, PARM_DECL or RESULT_DECL, clears the results of
    a previous call to layout_decl and calls it again.  */
 extern void relayout_decl (tree);
Index: gcc/stor-layout.c
===================================================================
--- gcc/stor-layout.c	2014-04-18 11:16:12.707092667 +0100
+++ gcc/stor-layout.c	2014-04-18 16:08:20.033752312 +0100
@@ -403,6 +403,73 @@ int_mode_for_mode (enum machine_mode mod
   return mode;
 }
 
+/* Find a mode that can be used for efficient bitwise operations on MODE.
+   Return BLKmode if no such mode exists.  */
+
+enum machine_mode
+bitwise_mode_for_mode (enum machine_mode mode)
+{
+  /* Quick exit if we already have a suitable mode.  */
+  unsigned int bitsize = GET_MODE_BITSIZE (mode);
+  if (SCALAR_INT_MODE_P (mode) && bitsize <= MAX_FIXED_MODE_SIZE)
+    return mode;
+
+  /* Reuse the sanity checks from int_mode_for_mode.  */
+  gcc_checking_assert ((int_mode_for_mode (mode), true));
+
+  /* Try to replace complex modes with complex modes.  In general we
+     expect both components to be processed independently, so we only
+     care whether there is a register for the inner mode.  */
+  if (COMPLEX_MODE_P (mode))
+    {
+      enum machine_mode trial = mode;
+      if (GET_MODE_CLASS (mode) != MODE_COMPLEX_INT)
+	trial = mode_for_size (bitsize, MODE_COMPLEX_INT, false);
+      if (trial != BLKmode
+	  && have_regs_of_mode[GET_MODE_INNER (trial)])
+	return trial;
+    }
+
+  /* Try to replace vector modes with vector modes.  Also try using vector
+     modes if an integer mode would be too big.  */
+  if (VECTOR_MODE_P (mode) || bitsize > MAX_FIXED_MODE_SIZE)
+    {
+      enum machine_mode trial = mode;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	trial = mode_for_size (bitsize, MODE_VECTOR_INT, 0);
+      if (trial != BLKmode
+	  && have_regs_of_mode[trial]
+	  && targetm.vector_mode_supported_p (trial))
+	return trial;
+    }
+
+  /* Otherwise fall back on integers while honoring MAX_FIXED_MODE_SIZE.  */
+  return mode_for_size (bitsize, MODE_INT, true);
+}
+
+/* Find a type that can be used for efficient bitwise operations on MODE.
+   Return null if no such mode exists.  */
+
+tree
+bitwise_type_for_mode (enum machine_mode mode)
+{
+  mode = bitwise_mode_for_mode (mode);
+  if (mode == BLKmode)
+    return NULL_TREE;
+
+  unsigned int inner_size = GET_MODE_UNIT_BITSIZE (mode);
+  tree inner_type = build_nonstandard_integer_type (inner_size, true);
+
+  if (VECTOR_MODE_P (mode))
+    return build_vector_type_for_mode (inner_type, mode);
+
+  if (COMPLEX_MODE_P (mode))
+    return build_complex_type (inner_type);
+
+  gcc_checking_assert (GET_MODE_INNER (mode) == VOIDmode);
+  return inner_type;
+}
+
 /* Find a mode that is suitable for representing a vector with
    NUNITS elements of mode INNERMODE.  Returns BLKmode if there
    is no suitable mode.  */
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c	2014-04-18 11:16:12.676092416 +0100
+++ gcc/builtins.c	2014-04-18 11:22:50.107315234 +0100
@@ -8921,29 +8921,11 @@ fold_builtin_memory_op (location_t loc,
       if (FLOAT_MODE_P (TYPE_MODE (desttype))
 	  || TREE_CODE (desttype) == BOOLEAN_TYPE
 	  || TREE_CODE (desttype) == ENUMERAL_TYPE)
-	{
-	  /* A more suitable int_mode_for_mode would return a vector
-	     integer mode for a vector float mode or a integer complex
-	     mode for a float complex mode if there isn't a regular
-	     integer mode covering the mode of desttype.  */
-	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
-	  if (mode == BLKmode)
-	    desttype = NULL_TREE;
-	  else
-	    desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
-						       1);
-	}
+	desttype = bitwise_type_for_mode (TYPE_MODE (desttype));
       if (FLOAT_MODE_P (TYPE_MODE (srctype))
 	  || TREE_CODE (srctype) == BOOLEAN_TYPE
 	  || TREE_CODE (srctype) == ENUMERAL_TYPE)
-	{
-	  enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
-	  if (mode == BLKmode)
-	    srctype = NULL_TREE;
-	  else
-	    srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
-						      1);
-	}
+	srctype = bitwise_type_for_mode (TYPE_MODE (srctype));
       if (!srctype)
 	srctype = desttype;
       if (!desttype)
Index: gcc/testsuite/gcc.dg/memcpy-5.c
===================================================================
--- /dev/null	2014-04-15 08:10:27.294524132 +0100
+++ gcc/testsuite/gcc.dg/memcpy-5.c	2014-04-18 11:21:52.036844396 +0100
@@ -0,0 +1,27 @@
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+#define TEST(NAME, TYPE) \
+  TYPE NAME##x; \
+  char NAME##y[sizeof (NAME##x)] __attribute__((aligned (__alignof__ (NAME##x)))); \
+  void NAME (void) { memcpy (&NAME##x, &NAME##y, sizeof (NAME##x)); }
+
+TEST (f, float);
+TEST (d, double);
+TEST (ld, long double);
+TEST (cf, _Complex float);
+TEST (cd, _Complex double);
+TEST (cld, _Complex long double);
+TEST (d8f, float __attribute__((vector_size (8))));
+TEST (d16f, float __attribute__((vector_size (16))));
+TEST (d32f, float __attribute__((vector_size (32))));
+TEST (d64f, float __attribute__((vector_size (64))));
+TEST (d128f, float __attribute__((vector_size (128))));
+TEST (d16d, double __attribute__((vector_size (16))));
+TEST (d32d, double __attribute__((vector_size (32))));
+TEST (d64d, double __attribute__((vector_size (64))));
+TEST (d128d, double __attribute__((vector_size (128))));
+
+/* { dg-final { scan-tree-dump-not "memcpy" "optimized" { target x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]