This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC PATCH] fold one element {mem{set,cpy,pcpy,move},b{zero,copy}} (PR middle-end/27567)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 30 Aug 2006 09:48:31 -0400
- Subject: [RFC PATCH] fold one element {mem{set,cpy,pcpy,move},b{zero,copy}} (PR middle-end/27567)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The following patch optimizes e.g.
__builtin_memcpy (&a, &b, sizeof (a));
if a and b have integral, pointer or floating type and sizeof (a) == sizeof
(b),
__builtin_memset (&a, 0, sizeof (a));
etc. during folding, which means in some cases expand doesn't need to force
variables into memory etc.
E.g. on the testcase from the PR
float convert (unsigned int in) { float f; __builtin_memcpy (&f, &in, sizeof (in)); return f; }
the difference with this patch is:
convert:
movl %edi, -4(%rsp)
- movl %edi, -20(%rsp)
- movss -4(%rsp), %xmm0
+ movss -4(%rsp), %xmm1
+ movaps %xmm1, %xmm0
ret
on x86-64 -m64 -O2 (not sure why reload chose %xmm1 instead of %xmm0, guess we need a
separate bug for that?) and:
convert:
pushl %ebp
movl %esp, %ebp
- subl $16, %esp
- movl 8(%ebp), %eax
- movl %eax, -4(%ebp)
- flds -4(%ebp)
- leave
+ flds 8(%ebp)
+ popl %ebp
ret
on i386 -m32 -O2 -march=pentium4.
On:
float convert (float in) { float f; __builtin_memcpy (&f, &in, sizeof (in)); return f; }
the difference is even bigger:
convert:
- movss %xmm0, -20(%rsp)
- movl -20(%rsp), %eax
- movl %eax, -4(%rsp)
- movss -4(%rsp), %xmm0
- ret
+ rep ; ret
on x86-64 -m64 -O2 and:
convert:
- subl $16, %esp
- movl 20(%esp), %eax
- movl %eax, 12(%esp)
- flds 12(%esp)
- addl $16, %esp
+ flds 4(%esp)
ret
on i386 -m32 -O2 -march=pentium4 -fomit-frame-pointer.
2006-08-30 Jakub Jelinek <jakub@redhat.com>
PR middle-end/27567
* builtins.c (fold_builtin_memcpy, fold_builtin_memmove): Remove.
(fold_builtin_mempcpy): Rename to...
(fold_builtin_memory_op): ... this. Optimize one element copy
into an assignment.
(fold_builtin_memset, fold_builtin_bzero, fold_builtin_bcopy): New
functions.
(expand_builtin_strcpy, expand_builtin_strncpy): Handle COMPOUND_EXPR.
(expand_builtin_memcpy, expand_builtin_mempcpy,
expand_builtin_memmove): Likewise. Use fold_builtin_memory_op.
(fold_builtin_1): Handle BUILT_IN_MEMSET, BUILT_IN_BZERO and
BUILT_IN_BCOPY. Use fold_builtin_memory_op for
BUILT_IN_MEM{CPY,PCPY,MOVE}.
--- gcc/builtins.c.jj 2006-08-28 13:00:22.000000000 +0200
+++ gcc/builtins.c 2006-08-30 15:16:35.000000000 +0200
@@ -160,9 +160,7 @@ static tree fold_builtin_ceil (tree, tre
static tree fold_builtin_round (tree, tree);
static tree fold_builtin_int_roundingfn (tree, tree);
static tree fold_builtin_bitop (tree, tree);
-static tree fold_builtin_memcpy (tree, tree);
-static tree fold_builtin_mempcpy (tree, tree, int);
-static tree fold_builtin_memmove (tree, tree);
+static tree fold_builtin_memory_op (tree, tree, bool, int);
static tree fold_builtin_strchr (tree, tree);
static tree fold_builtin_memcmp (tree);
static tree fold_builtin_strcmp (tree);
@@ -2857,10 +2855,19 @@ expand_builtin_memcpy (tree exp, rtx tar
unsigned int dest_align
= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
rtx dest_mem, src_mem, dest_addr, len_rtx;
- tree result = fold_builtin_memcpy (fndecl, arglist);
+ tree result = fold_builtin_memory_op (arglist, TREE_TYPE (TREE_TYPE (fndecl)),
+ false, /*endp=*/0);
if (result)
- return expand_expr (result, target, mode, EXPAND_NORMAL);
+ {
+ while (TREE_CODE (result) == COMPOUND_EXPR)
+ {
+ expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+ EXPAND_NORMAL);
+ result = TREE_OPERAND (result, 1);
+ }
+ return expand_expr (result, target, mode, EXPAND_NORMAL);
+ }
/* If DEST is not a pointer type, call the normal function. */
if (dest_align == 0)
@@ -2946,10 +2953,18 @@ expand_builtin_mempcpy (tree arglist, tr
unsigned int dest_align
= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
rtx dest_mem, src_mem, len_rtx;
- tree result = fold_builtin_mempcpy (arglist, type, endp);
+ tree result = fold_builtin_memory_op (arglist, type, false, endp);
if (result)
- return expand_expr (result, target, mode, EXPAND_NORMAL);
+ {
+ while (TREE_CODE (result) == COMPOUND_EXPR)
+ {
+ expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+ EXPAND_NORMAL);
+ result = TREE_OPERAND (result, 1);
+ }
+ return expand_expr (result, target, mode, EXPAND_NORMAL);
+ }
/* If either SRC or DEST is not a pointer type, don't do this
operation in-line. */
@@ -3020,10 +3035,18 @@ expand_builtin_memmove (tree arglist, tr
unsigned int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
unsigned int dest_align
= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
- tree result = fold_builtin_memmove (arglist, type);
+ tree result = fold_builtin_memory_op (arglist, type, false, /*endp=*/3);
if (result)
- return expand_expr (result, target, mode, EXPAND_NORMAL);
+ {
+ while (TREE_CODE (result) == COMPOUND_EXPR)
+ {
+ expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+ EXPAND_NORMAL);
+ result = TREE_OPERAND (result, 1);
+ }
+ return expand_expr (result, target, mode, EXPAND_NORMAL);
+ }
/* If DEST is not a pointer type, call the normal function. */
if (dest_align == 0)
@@ -3171,7 +3194,15 @@ expand_builtin_strcpy (tree fndecl, tree
{
tree result = fold_builtin_strcpy (fndecl, arglist, 0);
if (result)
- return expand_expr (result, target, mode, EXPAND_NORMAL);
+ {
+ while (TREE_CODE (result) == COMPOUND_EXPR)
+ {
+ expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+ EXPAND_NORMAL);
+ result = TREE_OPERAND (result, 1);
+ }
+ return expand_expr (result, target, mode, EXPAND_NORMAL);
+ }
return expand_movstr (TREE_VALUE (arglist),
TREE_VALUE (TREE_CHAIN (arglist)),
@@ -3297,7 +3328,15 @@ expand_builtin_strncpy (tree exp, rtx ta
tree result = fold_builtin_strncpy (fndecl, arglist, slen);
if (result)
- return expand_expr (result, target, mode, EXPAND_NORMAL);
+ {
+ while (TREE_CODE (result) == COMPOUND_EXPR)
+ {
+ expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+ EXPAND_NORMAL);
+ result = TREE_OPERAND (result, 1);
+ }
+ return expand_expr (result, target, mode, EXPAND_NORMAL);
+ }
/* We must be passed a constant len and src parameter. */
if (!host_integerp (len, 1) || !slen || !host_integerp (slen, 1))
@@ -7818,78 +7857,119 @@ fold_builtin_exponent (tree fndecl, tree
return 0;
}
-/* Fold function call to builtin memcpy. Return
+/* Fold function call to builtin memset. Return
NULL_TREE if no simplification can be made. */
static tree
-fold_builtin_memcpy (tree fndecl, tree arglist)
+fold_builtin_memset (tree arglist, tree type, bool ignore)
{
- tree dest, src, len;
+ tree dest, c, len, var, ret;
+ unsigned HOST_WIDE_INT length, cval;
if (!validate_arglist (arglist,
- POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+ POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
return 0;
dest = TREE_VALUE (arglist);
- src = TREE_VALUE (TREE_CHAIN (arglist));
+ c = TREE_VALUE (TREE_CHAIN (arglist));
len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ if (! host_integerp (len, 1))
+ return 0;
+
/* If the LEN parameter is zero, return DEST. */
if (integer_zerop (len))
- return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, src);
+ return omit_one_operand (type, dest, c);
- /* If SRC and DEST are the same (and not volatile), return DEST. */
- if (operand_equal_p (src, dest, 0))
- return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, len);
+ if (! host_integerp (c, 1) || TREE_SIDE_EFFECTS (dest))
+ return 0;
- return 0;
+ var = dest;
+ STRIP_NOPS (var);
+ if (TREE_CODE (var) != ADDR_EXPR)
+ return 0;
+
+ var = TREE_OPERAND (var, 0);
+ if (TREE_THIS_VOLATILE (var))
+ return 0;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (var))
+ && !POINTER_TYPE_P (TREE_TYPE (var)))
+ return 0;
+
+ length = tree_low_cst (len, 1);
+ if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (var))) != length)
+ return 0;
+
+ if (length > HOST_BITS_PER_WIDE_INT / BITS_PER_UNIT)
+ return 0;
+
+ if (integer_zerop (c))
+ cval = 0;
+ else
+ {
+ if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || HOST_BITS_PER_WIDE_INT > 64)
+ return 0;
+
+ cval = tree_low_cst (c, 1);
+ cval &= 0xff;
+ cval |= cval << 8;
+ cval |= cval << 16;
+ cval |= (cval << 31) << 1;
+ }
+
+ ret = build_int_cst_type (TREE_TYPE (var), cval);
+ ret = build2 (MODIFY_EXPR, TREE_TYPE (var), var, ret);
+ if (ignore)
+ return ret;
+
+ return omit_one_operand (type, dest, ret);
}
-/* Fold function call to builtin mempcpy. Return
+/* Fold function call to builtin memset. Return
NULL_TREE if no simplification can be made. */
static tree
-fold_builtin_mempcpy (tree arglist, tree type, int endp)
+fold_builtin_bzero (tree arglist, bool ignore)
{
- if (validate_arglist (arglist,
- POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
- {
- tree dest = TREE_VALUE (arglist);
- tree src = TREE_VALUE (TREE_CHAIN (arglist));
- tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree dest, size, newarglist;
- /* If the LEN parameter is zero, return DEST. */
- if (integer_zerop (len))
- return omit_one_operand (type, dest, src);
+ if (!validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+ return 0;
- /* If SRC and DEST are the same (and not volatile), return DEST+LEN. */
- if (operand_equal_p (src, dest, 0))
- {
- if (endp == 0)
- return omit_one_operand (type, dest, len);
+ if (!ignore)
+ return 0;
- if (endp == 2)
- len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
- ssize_int (1));
+ dest = TREE_VALUE (arglist);
+ size = TREE_VALUE (TREE_CHAIN (arglist));
- len = fold_convert (TREE_TYPE (dest), len);
- len = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
- return fold_convert (type, len);
- }
- }
- return 0;
+ /* New argument list transforming bzero(ptr x, int y) to
+ memset(ptr x, int 0, size_t y). This is done this way
+ so that if it isn't expanded inline, we fallback to
+ calling bzero instead of memset. */
+
+ newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
+ newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist);
+ newarglist = tree_cons (NULL_TREE, dest, newarglist);
+ return fold_builtin_memset (newarglist, void_type_node, ignore);
}
-/* Fold function call to builtin memmove. Return
- NULL_TREE if no simplification can be made. */
+/* Fold function call to builtin mem{{,p}cpy,move}. Return
+ NULL_TREE if no simplification can be made.
+ If ENDP is 0, return DEST (like memcpy).
+ If ENDP is 1, return DEST+LEN (like mempcpy).
+ If ENDP is 2, return DEST+LEN-1 (like stpcpy).
+ If ENDP is 3, return DEST, additionally *SRC and *DEST may overlap
+ (memmove). */
static tree
-fold_builtin_memmove (tree arglist, tree type)
+fold_builtin_memory_op (tree arglist, tree type, bool ignore, int endp)
{
- tree dest, src, len;
+ tree dest, src, len, destvar, srcvar, expr;
+ unsigned HOST_WIDE_INT length;
- if (!validate_arglist (arglist,
- POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+ if (! validate_arglist (arglist,
+ POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
return 0;
dest = TREE_VALUE (arglist);
@@ -7900,11 +7980,111 @@ fold_builtin_memmove (tree arglist, tree
if (integer_zerop (len))
return omit_one_operand (type, dest, src);
- /* If SRC and DEST are the same (and not volatile), return DEST. */
+ /* If SRC and DEST are the same (and not volatile), return
+ DEST{,+LEN,+LEN-1}. */
if (operand_equal_p (src, dest, 0))
- return omit_one_operand (type, dest, len);
+ expr = len;
+ else
+ {
+ if (! host_integerp (len, 1))
+ return 0;
- return 0;
+ if (TREE_SIDE_EFFECTS (dest) || TREE_SIDE_EFFECTS (src))
+ return 0;
+
+ destvar = dest;
+ STRIP_NOPS (destvar);
+ if (TREE_CODE (destvar) != ADDR_EXPR)
+ return 0;
+
+ destvar = TREE_OPERAND (destvar, 0);
+ if (TREE_THIS_VOLATILE (destvar))
+ return 0;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (destvar))
+ && !POINTER_TYPE_P (TREE_TYPE (destvar))
+ && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (destvar)))
+ return 0;
+
+ srcvar = src;
+ STRIP_NOPS (srcvar);
+ if (TREE_CODE (srcvar) != ADDR_EXPR)
+ return 0;
+
+ srcvar = TREE_OPERAND (srcvar, 0);
+ if (TREE_THIS_VOLATILE (srcvar))
+ return 0;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
+ && !POINTER_TYPE_P (TREE_TYPE (srcvar))
+ && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (srcvar)))
+ return 0;
+
+ length = tree_low_cst (len, 1);
+ if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (destvar))) != length
+ || GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (srcvar))) != length)
+ return 0;
+
+ if ((INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
+ || POINTER_TYPE_P (TREE_TYPE (srcvar)))
+ && (INTEGRAL_TYPE_P (TREE_TYPE (destvar))
+ || POINTER_TYPE_P (TREE_TYPE (destvar))))
+ expr = fold_convert (TREE_TYPE (destvar), srcvar);
+ else
+ expr = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (destvar), srcvar);
+ expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, expr);
+ }
+
+ if (ignore)
+ return expr;
+
+ if (endp == 0 || endp == 3)
+ return omit_one_operand (type, dest, expr);
+
+ if (expr == len)
+ expr = 0;
+
+ if (endp == 2)
+ len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
+ ssize_int (1));
+
+ len = fold_convert (TREE_TYPE (dest), len);
+ dest = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
+ dest = fold_convert (type, dest);
+ if (expr)
+ dest = omit_one_operand (type, dest, expr);
+ return dest;
+}
+
+/* Fold function call to builtin bcopy. Return NULL_TREE if no
+ simplification can be made. */
+
+static tree
+fold_builtin_bcopy (tree arglist, bool ignore)
+{
+ tree src, dest, size, newarglist;
+
+ if (!validate_arglist (arglist,
+ POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+ return 0;
+
+ if (! ignore)
+ return 0;
+
+ src = TREE_VALUE (arglist);
+ dest = TREE_VALUE (TREE_CHAIN (arglist));
+ size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+
+ /* New argument list transforming bcopy(ptr x, ptr y, int z) to
+ memmove(ptr y, ptr x, size_t z). This is done this way
+ so that if it isn't expanded inline, we fallback to
+ calling bcopy instead of memmove. */
+
+ newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
+ newarglist = tree_cons (NULL_TREE, src, newarglist);
+ newarglist = tree_cons (NULL_TREE, dest, newarglist);
+
+ return fold_builtin_memory_op (newarglist, void_type_node, true, /*endp=*/3);
}
/* Fold function call to builtin strcpy. If LEN is not NULL, it represents
@@ -8795,14 +8975,23 @@ fold_builtin_1 (tree fndecl, tree arglis
CASE_INT_FN (BUILT_IN_PARITY):
return fold_builtin_bitop (fndecl, arglist);
+ case BUILT_IN_MEMSET:
+ return fold_builtin_memset (arglist, type, ignore);
+
case BUILT_IN_MEMCPY:
- return fold_builtin_memcpy (fndecl, arglist);
+ return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/0);
case BUILT_IN_MEMPCPY:
- return fold_builtin_mempcpy (arglist, type, /*endp=*/1);
+ return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/1);
case BUILT_IN_MEMMOVE:
- return fold_builtin_memmove (arglist, type);
+ return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/3);
+
+ case BUILT_IN_BZERO:
+ return fold_builtin_bzero (arglist, ignore);
+
+ case BUILT_IN_BCOPY:
+ return fold_builtin_bcopy (arglist, ignore);
CASE_FLT_FN (BUILT_IN_SIGNBIT):
return fold_builtin_signbit (fndecl, arglist);
Jakub