This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
PATCH: middle-end/58981: movmem/setmem use mode wider than Pmode for size
- From: "H.J. Lu" <hongjiu dot lu at intel dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: law at redhat dot com, rguenther at suse dot de
- Date: Mon, 4 Nov 2013 02:32:16 -0800
- Subject: PATCH: middle-end/58981: movmem/setmem use mode wider than Pmode for size
- Authentication-results: sourceware.org; auth=none
- Reply-to: "H.J. Lu" <hjl dot tools at gmail dot com>
emit_block_move_via_movmem and set_storage_via_setmem have
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
enum insn_code code = direct_optab_handler (movmem_optab, mode);
if (code != CODE_FOR_nothing
/* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT
here because if SIZE is less than the mode mask, as it is
returned by the macro, it will definitely be less than the
actual mode mask. */
&& ((CONST_INT_P (size)
&& ((unsigned HOST_WIDE_INT) INTVAL (size)
<= (GET_MODE_MASK (mode) >> 1)))
|| GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
{
Backend may assume mode of size in movmem and setmem expanders is no
widder than Pmode since size is within the Pmode address space. X86
backend expand_set_or_movmem_prologue_epilogue_by_misaligned has
rtx saveddest = *destptr;
gcc_assert (desired_align <= size);
/* Align destptr up, place it to new register. */
*destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
GEN_INT (prolog_size),
NULL_RTX, 1, OPTAB_DIRECT);
*destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
GEN_INT (-desired_align),
*destptr, 1, OPTAB_DIRECT);
/* See how many bytes we skipped. */
saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
*destptr,
saveddest, 1, OPTAB_DIRECT);
/* Adjust srcptr and count. */
if (!issetmem)
*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
*srcptr, 1, OPTAB_DIRECT);
*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
saveddest, *count, 1, OPTAB_DIRECT);
saveddest is a negative number in Pmode and *count is in word_mode. For
x32, when Pmode is SImode and word_mode is DImode, saveddest + *count
leads to overflow. We could fix it by using mode of saveddest to compute
saveddest + *count. But it leads to extra conversions and other backends
may run into the same problem. A better fix is to limit mode of size in
movmem and setmem expanders to Pmode. It generates better and correct
memcpy and memset for x32.
There is also a typo in comments. It should be BITS_PER_WORD, not
BITS_PER_HOST_WIDE_INT.
Tested on x32. OK to install?
Thanks.
H.J.
---
gcc/
2013-11-04 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/58981
* expr.c (emit_block_move_via_movmem): Don't use mode wider than
Pmode for size.
(set_storage_via_setmem): Likewise.
gcc/testsuite/
2013-11-04 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/58981
* gcc.dg/pr58981.c: New test.
diff --git a/gcc/expr.c b/gcc/expr.c
index 551a660..1a869650 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1294,13 +1294,16 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
enum insn_code code = direct_optab_handler (movmem_optab, mode);
if (code != CODE_FOR_nothing
- /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT
- here because if SIZE is less than the mode mask, as it is
+ /* We don't need MODE to be narrower than BITS_PER_WORD here
+ because if SIZE is less than the mode mask, as it is
returned by the macro, it will definitely be less than the
- actual mode mask. */
+ actual mode mask unless MODE is wider than Pmode. Since
+ SIZE is within the Pmode address space, we should use
+ Pmode in this case. */
&& ((CONST_INT_P (size)
&& ((unsigned HOST_WIDE_INT) INTVAL (size)
<= (GET_MODE_MASK (mode) >> 1)))
+ || GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode)
|| GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
{
struct expand_operand ops[6];
@@ -2879,13 +2882,16 @@ set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
enum insn_code code = direct_optab_handler (setmem_optab, mode);
if (code != CODE_FOR_nothing
- /* We don't need MODE to be narrower than
- BITS_PER_HOST_WIDE_INT here because if SIZE is less than
- the mode mask, as it is returned by the macro, it will
- definitely be less than the actual mode mask. */
+ /* We don't need MODE to be narrower than BITS_PER_WORD here
+ because if SIZE is less than the mode mask, as it is
+ returned by the macro, it will definitely be less than the
+ actual mode mask unless MODE is wider than Pmode. Since
+ SIZE is within the Pmode address space, we should use
+ Pmode in this case. */
&& ((CONST_INT_P (size)
&& ((unsigned HOST_WIDE_INT) INTVAL (size)
<= (GET_MODE_MASK (mode) >> 1)))
+ || GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode)
|| GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
{
struct expand_operand ops[6];
diff --git a/gcc/testsuite/gcc.dg/pr58981.c b/gcc/testsuite/gcc.dg/pr58981.c
new file mode 100644
index 0000000..1c8293e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr58981.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-minline-all-stringops" { target { i?86-*-* x86_64-*-* } } } */
+
+extern void abort (void);
+
+#define MAX_OFFSET (sizeof (long long))
+#define MAX_COPY (8 * sizeof (long long))
+#define MAX_EXTRA (sizeof (long long))
+
+#define MAX_LENGTH (MAX_OFFSET + MAX_COPY + MAX_EXTRA)
+
+static union {
+ char buf[MAX_LENGTH];
+ long long align_int;
+ long double align_fp;
+} u;
+
+char A[MAX_LENGTH];
+
+int
+main ()
+{
+ int off, len, i;
+ char *p, *q;
+
+ for (i = 0; i < MAX_LENGTH; i++)
+ A[i] = 'A';
+
+ for (off = 0; off < MAX_OFFSET; off++)
+ for (len = 1; len < MAX_COPY; len++)
+ {
+ for (i = 0; i < MAX_LENGTH; i++)
+ u.buf[i] = 'a';
+
+ p = __builtin_memcpy (u.buf + off, A, len);
+ if (p != u.buf + off)
+ abort ();
+
+ q = u.buf;
+ for (i = 0; i < off; i++, q++)
+ if (*q != 'a')
+ abort ();
+
+ for (i = 0; i < len; i++, q++)
+ if (*q != 'A')
+ abort ();
+
+ for (i = 0; i < MAX_EXTRA; i++, q++)
+ if (*q != 'a')
+ abort ();
+ }
+
+ return 0;
+}