RFA: tweak integer type used for memcpy folding
Richard Sandiford
rdsandiford@googlemail.com
Tue Apr 22 07:39:00 GMT 2014
Richard Sandiford <rdsandiford@googlemail.com> writes:
> wide-int fails to build libitm because of a bad interaction between:
>
> /* Keep the OI and XI modes from confusing the compiler into thinking
> that these modes could actually be used for computation. They are
> only holders for vectors during data movement. */
> #define MAX_BITSIZE_MODE_ANY_INT (128)
>
> and the memcpy folding code:
>
> /* Make sure we are not copying using a floating-point mode or
> a type whose size possibly does not match its precision. */
> if (FLOAT_MODE_P (TYPE_MODE (desttype))
> || TREE_CODE (desttype) == BOOLEAN_TYPE
> || TREE_CODE (desttype) == ENUMERAL_TYPE)
> {
> /* A more suitable int_mode_for_mode would return a vector
> integer mode for a vector float mode or a integer complex
> mode for a float complex mode if there isn't a regular
> integer mode covering the mode of desttype. */
> enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
> if (mode == BLKmode)
> desttype = NULL_TREE;
> else
> desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
> 1);
> }
> if (FLOAT_MODE_P (TYPE_MODE (srctype))
> || TREE_CODE (srctype) == BOOLEAN_TYPE
> || TREE_CODE (srctype) == ENUMERAL_TYPE)
> {
> enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
> if (mode == BLKmode)
> srctype = NULL_TREE;
> else
> srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
> 1);
> }
>
> The failure occurs for complex long double, which we try to copy as
> a 256-bit integer type (OImode).
>
> This patch tries to do what the comment suggests by introducing a new
> form of int_mode_for_mode that replaces vector modes with vector modes
> and complex modes with complex modes. The fallback case of using a
> MODE_INT is limited by MAX_FIXED_MODE_SIZE, so can never go above
> 128 bits on x86_64.
>
> The question then is what to do about 128-bit types for i386.
> MAX_FIXED_MODE_SIZE is 64 there, which says that int128_t shouldn't be
> used for optimisation. However, gcc.target/i386/pr49168-1.c only passes
> for -m32 -msse2 because we use int128_t to copy a float128_t.
>
> I handled that by allowing MODE_VECTOR_INT to be used instead of
> MODE_INT if the mode size is greater than MAX_FIXED_MODE_SIZE,
> even if the original type wasn't a vector.
>
> It might be that other callers to int_mode_for_mode should use
> the new function too, but I'll look at that separately.
>
> I used the attached testcase (with printfs added to gcc) to check that
> the right modes and types were being chosen. The patch fixes the
> complex float and complex double cases, since the integer type that we
> previously picked had a larger alignment than the original complex type.
>
> One possibly subtle side-effect of FLOAT_MODE_P (TYPE_MODE (desttype))
> is that vectors are copied as integer vectors if the target supports
> them directly but are copied as float vectors otherwise, since in the
> latter case the mode will be BLKmode. E.g. the 1024-bit vectors in the
> test are copied as vector floats and vector doubles both before and
> after the patch.
>
> Tested against trunk with x86_64-linux-gnu {,-m32}. OK to install?
There was a typo in the declaration of the mode->mode function,
should have been as follows.
Thanks,
Richard
gcc/
* machmode.h (bitwise_mode_for_mode): Declare.
* stor-layout.h (bitwise_type_for_mode): Likewise.
* stor-layout.c (bitwise_mode_for_mode): New function.
(bitwise_type_for_mode): Likewise.
* builtins.c (fold_builtin_memory_op): Use it instead of
int_mode_for_mode and build_nonstandard_integer_type.
gcc/testsuite/
* gcc.dg/memcpy-5.c: New test.
Index: gcc/machmode.h
===================================================================
--- gcc/machmode.h 2014-04-21 10:35:17.611603989 +0100
+++ gcc/machmode.h 2014-04-21 13:58:59.403884452 +0100
@@ -253,6 +253,8 @@ extern enum machine_mode smallest_mode_f
extern enum machine_mode int_mode_for_mode (enum machine_mode);
+extern enum machine_mode bitwise_mode_for_mode (enum machine_mode);
+
/* Return a mode that is suitable for representing a vector,
or BLKmode on failure. */
Index: gcc/stor-layout.h
===================================================================
--- gcc/stor-layout.h 2014-04-21 10:35:17.611603989 +0100
+++ gcc/stor-layout.h 2014-04-21 13:58:59.405878960 +0100
@@ -98,6 +98,8 @@ extern tree make_unsigned_type (int);
mode_for_size, but is passed a tree. */
extern enum machine_mode mode_for_size_tree (const_tree, enum mode_class, int);
+extern tree bitwise_type_for_mode (enum machine_mode);
+
/* Given a VAR_DECL, PARM_DECL or RESULT_DECL, clears the results of
a previous call to layout_decl and calls it again. */
extern void relayout_decl (tree);
Index: gcc/stor-layout.c
===================================================================
--- gcc/stor-layout.c 2014-04-21 10:35:17.611603989 +0100
+++ gcc/stor-layout.c 2014-04-21 13:58:59.404881706 +0100
@@ -403,6 +403,73 @@ int_mode_for_mode (enum machine_mode mod
return mode;
}
+/* Find a mode that can be used for efficient bitwise operations on MODE.
+ Return BLKmode if no such mode exists. */
+
+enum machine_mode
+bitwise_mode_for_mode (enum machine_mode mode)
+{
+ /* Quick exit if we already have a suitable mode. */
+ unsigned int bitsize = GET_MODE_BITSIZE (mode);
+ if (SCALAR_INT_MODE_P (mode) && bitsize <= MAX_FIXED_MODE_SIZE)
+ return mode;
+
+ /* Reuse the sanity checks from int_mode_for_mode. */
+ gcc_checking_assert ((int_mode_for_mode (mode), true));
+
+ /* Try to replace complex modes with complex modes. In general we
+ expect both components to be processed independently, so we only
+ care whether there is a register for the inner mode. */
+ if (COMPLEX_MODE_P (mode))
+ {
+ enum machine_mode trial = mode;
+ if (GET_MODE_CLASS (mode) != MODE_COMPLEX_INT)
+ trial = mode_for_size (bitsize, MODE_COMPLEX_INT, false);
+ if (trial != BLKmode
+ && have_regs_of_mode[GET_MODE_INNER (trial)])
+ return trial;
+ }
+
+ /* Try to replace vector modes with vector modes. Also try using vector
+ modes if an integer mode would be too big. */
+ if (VECTOR_MODE_P (mode) || bitsize > MAX_FIXED_MODE_SIZE)
+ {
+ enum machine_mode trial = mode;
+ if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+ trial = mode_for_size (bitsize, MODE_VECTOR_INT, 0);
+ if (trial != BLKmode
+ && have_regs_of_mode[trial]
+ && targetm.vector_mode_supported_p (trial))
+ return trial;
+ }
+
+ /* Otherwise fall back on integers while honoring MAX_FIXED_MODE_SIZE. */
+ return mode_for_size (bitsize, MODE_INT, true);
+}
+
+/* Find a type that can be used for efficient bitwise operations on MODE.
+ Return null if no such mode exists. */
+
+tree
+bitwise_type_for_mode (enum machine_mode mode)
+{
+ mode = bitwise_mode_for_mode (mode);
+ if (mode == BLKmode)
+ return NULL_TREE;
+
+ unsigned int inner_size = GET_MODE_UNIT_BITSIZE (mode);
+ tree inner_type = build_nonstandard_integer_type (inner_size, true);
+
+ if (VECTOR_MODE_P (mode))
+ return build_vector_type_for_mode (inner_type, mode);
+
+ if (COMPLEX_MODE_P (mode))
+ return build_complex_type (inner_type);
+
+ gcc_checking_assert (GET_MODE_INNER (mode) == VOIDmode);
+ return inner_type;
+}
+
/* Find a mode that is suitable for representing a vector with
NUNITS elements of mode INNERMODE. Returns BLKmode if there
is no suitable mode. */
Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c 2014-04-21 10:35:17.611603989 +0100
+++ gcc/builtins.c 2014-04-21 13:58:59.390920150 +0100
@@ -8921,29 +8921,11 @@ fold_builtin_memory_op (location_t loc,
if (FLOAT_MODE_P (TYPE_MODE (desttype))
|| TREE_CODE (desttype) == BOOLEAN_TYPE
|| TREE_CODE (desttype) == ENUMERAL_TYPE)
- {
- /* A more suitable int_mode_for_mode would return a vector
- integer mode for a vector float mode or a integer complex
- mode for a float complex mode if there isn't a regular
- integer mode covering the mode of desttype. */
- enum machine_mode mode = int_mode_for_mode (TYPE_MODE (desttype));
- if (mode == BLKmode)
- desttype = NULL_TREE;
- else
- desttype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
- 1);
- }
+ desttype = bitwise_type_for_mode (TYPE_MODE (desttype));
if (FLOAT_MODE_P (TYPE_MODE (srctype))
|| TREE_CODE (srctype) == BOOLEAN_TYPE
|| TREE_CODE (srctype) == ENUMERAL_TYPE)
- {
- enum machine_mode mode = int_mode_for_mode (TYPE_MODE (srctype));
- if (mode == BLKmode)
- srctype = NULL_TREE;
- else
- srctype = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode),
- 1);
- }
+ srctype = bitwise_type_for_mode (TYPE_MODE (srctype));
if (!srctype)
srctype = desttype;
if (!desttype)
Index: gcc/testsuite/gcc.dg/memcpy-5.c
===================================================================
--- /dev/null 2014-04-15 08:10:27.294524132 +0100
+++ gcc/testsuite/gcc.dg/memcpy-5.c 2014-04-21 13:58:59.469703199 +0100
@@ -0,0 +1,27 @@
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+#define TEST(NAME, TYPE) \
+ TYPE NAME##x; \
+ char NAME##y[sizeof (NAME##x)] __attribute__((aligned (__alignof__ (NAME##x)))); \
+ void NAME (void) { memcpy (&NAME##x, &NAME##y, sizeof (NAME##x)); }
+
+TEST (f, float);
+TEST (d, double);
+TEST (ld, long double);
+TEST (cf, _Complex float);
+TEST (cd, _Complex double);
+TEST (cld, _Complex long double);
+TEST (d8f, float __attribute__((vector_size (8))));
+TEST (d16f, float __attribute__((vector_size (16))));
+TEST (d32f, float __attribute__((vector_size (32))));
+TEST (d64f, float __attribute__((vector_size (64))));
+TEST (d128f, float __attribute__((vector_size (128))));
+TEST (d16d, double __attribute__((vector_size (16))));
+TEST (d32d, double __attribute__((vector_size (32))));
+TEST (d64d, double __attribute__((vector_size (64))));
+TEST (d128d, double __attribute__((vector_size (128))));
+
+/* { dg-final { scan-tree-dump-not "memcpy" "optimized" { target x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
More information about the Gcc-patches
mailing list