This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Better optimize mempcpy and stpcpy builtins
- From: Jakub Jelinek <jakub at redhat dot com>
- To: rth at redhat dot com, "Kaveh R. Ghazi" <ghazi at caip dot rutgers dot edu>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Wed, 21 May 2003 23:50:42 +0200
- Subject: [PATCH] Better optimize mempcpy and stpcpy builtins
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
The following patch implements what I talked about in
http://gcc.gnu.org/ml/gcc-patches/2003-05/msg00389.html
Particularly, builtin mempcpy is never deoptimized into memcpy call followed
by return value adjusting and if possible, a final to pointer from various
block move methods (store_by_pieces, move_by_pieces for now) is used instead
of computing it again from the original value. E.g. on IA-64 for mempcpy of
17 bytes
mov r8 = to
st8 [r8] = firstvalue, 8
st8 [r8] = secondvalue, 8
st1 [r8] = thirdvalue, 1
instead of
mov rX = to
adds r8 = 17, to
st8 [rX] = firstvalue, 8
st8 [rX] = secondvalue, 8
st1 [rX] = thirdvalue
can be used.
I have not touched movstr yet, wonder if I should add new optabs like
movstrendpM in addition to current movstrM for insns which would return
the end pointer (e.g. on IA-32 it should be quite easy, destreg is kept
up-to-date it seems), or if I should change movstrM on all arches.
2003-05-21 Jakub Jelinek <jakub@redhat.com>
* builtins.c (expand_builtin_memcpy): Pass endp to store_by_pieces,
and use its return value instead of computing result here.
Pass BLOCK_OP_NORMAL_ENDP to emit_block_move if endp, if dest_addr
is not NULL, expect len_rtx to be already added.
(expand_builtin_mempcpy): If ignoring result, only do expand_call.
(expand_builtin_stpcpy): Likewise.
(expand_builtin_strncpy, expand_builtin_memset): Adjust
store_by_pices callers.
* expr.c (move_by_pieces): Add endp argument, return to resp.
memory at end or one byte earlier depending on endp.
(store_by_pieces): Likewise.
(emit_block_move): Handle BLOCK_OP_NORMAL_ENDP.
Adjust calls to move_by_pieces and emit_block_move_via_libcall.
(emit_block_move_via_libcall): Add endp argument, expand to mempcpy
if endp is non-zero.
(mempcpy_fn): New variable.
(init_block_move_fn): Add endp argument, return mempcpy decl if it is
set.
(emit_block_move_libcall_fn): Adjust caller.
(emit_mempcpy_libcall_fn): New function.
(emit_push_insn): Adjust move_by_pieces caller.
* c-decl.c (finish_decl): Call init_block_move_fn for
BUILT_IN_MEMPCPY. Pass 0 to init_block_move_fn when called
for BUILT_IN_MEMCPY or BUILT_IN_BCOPY.
* expr.h (enum block_op_methods): Add BLOCK_OP_NORMAL_ENDP.
(init_block_move_fn, store_by_pieces): Adjust prototypes.
* rtl.h (move_by_pieces): Adjust prototype.
* config/mips/mips.c (expand_block_move): Adjust move_by_pieces
caller.
* gcc.c-torture/execute/string-opt-3.c: New test.
* gcc.dg/string-opt-1.c: New test.
--- gcc/builtins.c.jj 2003-05-21 06:22:18.000000000 -0400
+++ gcc/builtins.c 2003-05-21 17:03:12.000000000 -0400
@@ -2301,28 +2301,15 @@ expand_builtin_memcpy (arglist, target,
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
(PTR) src_str, dest_align))
{
- store_by_pieces (dest_mem, INTVAL (len_rtx),
- builtin_memcpy_read_str,
- (PTR) src_str, dest_align);
+ dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
+ builtin_memcpy_read_str,
+ (PTR) src_str, dest_align, endp);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
#ifdef POINTERS_EXTEND_UNSIGNED
if (GET_MODE (dest_mem) != ptr_mode)
dest_mem = convert_memory_address (ptr_mode, dest_mem);
#endif
- if (endp)
- {
- rtx result;
- rtx delta = len_rtx;
-
- if (endp == 2)
- delta = GEN_INT (INTVAL (delta) - 1);
-
- result = simplify_gen_binary (PLUS, GET_MODE (dest_mem),
- dest_mem, delta);
- return force_operand (result, NULL_RTX);
- }
- else
- return dest_mem;
+ return dest_mem;
}
src_mem = get_memory_rtx (src);
@@ -2330,7 +2317,8 @@ expand_builtin_memcpy (arglist, target,
/* Copy word part most expediently. */
dest_addr = emit_block_move (dest_mem, src_mem, len_rtx,
- BLOCK_OP_NORMAL);
+ endp ? BLOCK_OP_NORMAL_ENDP
+ : BLOCK_OP_NORMAL);
if (dest_addr == 0)
{
@@ -2339,25 +2327,28 @@ expand_builtin_memcpy (arglist, target,
if (GET_MODE (dest_addr) != ptr_mode)
dest_addr = convert_memory_address (ptr_mode, dest_addr);
#endif
- }
+ if (endp)
+ {
+ rtx result = force_operand (len_rtx, NULL_RTX);
- if (endp)
- {
- rtx result = force_operand (len_rtx, NULL_RTX);
+ if (endp == 2)
+ {
+ enum machine_mode mode = GET_MODE (result);
- if (endp == 2)
- {
- result = simplify_gen_binary (MINUS, GET_MODE (result),
- result, const1_rtx);
- result = force_operand (result, NULL_RTX);
+ if (mode == VOIDmode)
+ mode = GET_MODE (dest_addr);
+ result = simplify_gen_binary (PLUS, mode, result,
+ constm1_rtx);
+ }
+ result = simplify_gen_binary (PLUS, GET_MODE (dest_addr),
+ dest_addr, result);
+ dest_addr = force_operand (result, NULL_RTX);
}
-
- result = simplify_gen_binary (PLUS, GET_MODE (dest_addr),
- dest_addr, result);
- return force_operand (result, NULL_RTX);
}
- else
- return dest_addr;
+ else if (endp == 2)
+ dest_addr = simplify_gen_binary (PLUS, GET_MODE (dest_addr),
+ dest_addr, constm1_rtx);
+ return dest_addr;
}
}
@@ -2380,13 +2371,8 @@ expand_builtin_mempcpy (arglist, target,
/* If return value is ignored, transform mempcpy into memcpy. */
if (target == const0_rtx)
{
- tree fn;
- rtx ret = expand_builtin_memcpy (arglist, target, mode, /*endp=*/0);
-
- if (ret)
- return ret;
+ tree fn = implicit_built_in_decls[BUILT_IN_MEMCPY];
- fn = implicit_built_in_decls[BUILT_IN_MEMCPY];
if (!fn)
return 0;
@@ -2533,13 +2519,7 @@ expand_builtin_stpcpy (arglist, target,
/* If return value is ignored, transform stpcpy into strcpy. */
if (target == const0_rtx)
{
- tree fn;
- rtx ret = expand_builtin_strcpy (arglist, target, mode);
-
- if (ret)
- return ret;
-
- fn = implicit_built_in_decls[BUILT_IN_STRCPY];
+ tree fn = implicit_built_in_decls[BUILT_IN_STRCPY];
if (!fn)
return 0;
@@ -2547,7 +2527,7 @@ expand_builtin_stpcpy (arglist, target,
target, mode, EXPAND_NORMAL);
}
- /* Ensure we get an actual string who length can be evaluated at
+ /* Ensure we get an actual string whose length can be evaluated at
compile-time, not an expression containing a string. This is
because the latter will potentially produce pessimized code
when used to produce the return value. */
@@ -2640,7 +2620,7 @@ expand_builtin_strncpy (arglist, target,
dest_mem = get_memory_rtx (dest);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_strncpy_read_str,
- (PTR) p, dest_align);
+ (PTR) p, dest_align, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
#ifdef POINTERS_EXTEND_UNSIGNED
if (GET_MODE (dest_mem) != ptr_mode)
@@ -2768,7 +2748,7 @@ expand_builtin_memset (arglist, target,
dest_mem = get_memory_rtx (dest);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_memset_gen_str,
- (PTR) val_rtx, dest_align);
+ (PTR) val_rtx, dest_align, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
#ifdef POINTERS_EXTEND_UNSIGNED
if (GET_MODE (dest_mem) != ptr_mode)
@@ -2792,7 +2772,7 @@ expand_builtin_memset (arglist, target,
dest_mem = get_memory_rtx (dest);
store_by_pieces (dest_mem, tree_low_cst (len, 1),
builtin_memset_read_str,
- (PTR) &c, dest_align);
+ (PTR) &c, dest_align, 0);
dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
#ifdef POINTERS_EXTEND_UNSIGNED
if (GET_MODE (dest_mem) != ptr_mode)
--- gcc/expr.h.jj 2003-05-16 05:54:48.000000000 -0400
+++ gcc/expr.h 2003-05-21 11:58:26.000000000 -0400
@@ -399,11 +399,12 @@ extern rtx convert_modes PARAMS ((enum m
enum block_op_methods
{
BLOCK_OP_NORMAL,
+ BLOCK_OP_NORMAL_ENDP,
BLOCK_OP_NO_LIBCALL,
BLOCK_OP_CALL_PARM
};
-extern void init_block_move_fn PARAMS ((const char *));
+extern void init_block_move_fn PARAMS ((const char *, int));
extern void init_block_clear_fn PARAMS ((const char *));
extern rtx emit_block_move PARAMS ((rtx, rtx, rtx, enum block_op_methods));
@@ -463,11 +464,12 @@ extern int can_store_by_pieces PARAMS ((
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. */
-extern void store_by_pieces PARAMS ((rtx, unsigned HOST_WIDE_INT,
- rtx (*) (PTR, HOST_WIDE_INT,
- enum machine_mode),
- PTR, unsigned int));
+ ALIGN is maximum alignment we can assume.
+ Returns TO + LEN. */
+extern rtx store_by_pieces PARAMS ((rtx, unsigned HOST_WIDE_INT,
+ rtx (*) (PTR, HOST_WIDE_INT,
+ enum machine_mode),
+ PTR, unsigned int, int));
/* Emit insns to set X from Y. */
extern rtx emit_move_insn PARAMS ((rtx, rtx));
--- gcc/expr.c.jj 2003-05-16 05:54:48.000000000 -0400
+++ gcc/expr.c 2003-05-21 13:18:41.000000000 -0400
@@ -138,8 +138,9 @@ static void move_by_pieces_1 PARAMS ((rt
struct move_by_pieces *));
static bool block_move_libcall_safe_for_call_parm PARAMS ((void));
static bool emit_block_move_via_movstr PARAMS ((rtx, rtx, rtx, unsigned));
-static rtx emit_block_move_via_libcall PARAMS ((rtx, rtx, rtx));
+static rtx emit_block_move_via_libcall PARAMS ((rtx, rtx, rtx, int));
static tree emit_block_move_libcall_fn PARAMS ((int));
+static tree emit_mempcpy_libcall_fn PARAMS ((void));
static void emit_block_move_via_loop PARAMS ((rtx, rtx, rtx, unsigned));
static rtx clear_by_pieces_1 PARAMS ((PTR, HOST_WIDE_INT,
enum machine_mode));
@@ -1463,13 +1464,18 @@ convert_modes (mode, oldmode, x, unsigne
If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
used to push FROM to the stack.
- ALIGN is maximum stack alignment we can assume. */
+ ALIGN is maximum stack alignment we can assume.
-void
-move_by_pieces (to, from, len, align)
+ If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
+ mempcpy, and if ENDP is 2 return memory the end minus one byte ala
+ stpcpy. */
+
+rtx
+move_by_pieces (to, from, len, align, endp)
rtx to, from;
unsigned HOST_WIDE_INT len;
unsigned int align;
+ int endp;
{
struct move_by_pieces data;
rtx to_addr, from_addr = XEXP (from, 0);
@@ -1583,6 +1589,36 @@ move_by_pieces (to, from, len, align)
/* The code above should have handled everything. */
if (data.len > 0)
abort ();
+
+ if (endp)
+ {
+ rtx to1;
+
+ if (data.reverse)
+ abort ();
+ if (data.autinc_to)
+ {
+ if (endp == 2)
+ {
+ if (HAVE_POST_INCREMENT && data.explicit_inc_to > 0)
+ emit_insn (gen_add2_insn (data.to_addr, constm1_rtx));
+ else
+ data.to_addr = copy_addr_to_reg (plus_constant (data.to_addr,
+ -1));
+ }
+ to1 = adjust_automodify_address (data.to, QImode, data.to_addr,
+ data.offset);
+ }
+ else
+ {
+ if (endp == 2)
+ --data.offset;
+ to1 = adjust_address (data.to, QImode, data.offset);
+ }
+ return to1;
+ }
+ else
+ return data.to;
}
/* Return number of insns required to move L bytes by pieces.
@@ -1707,6 +1743,7 @@ emit_block_move (x, y, size, method)
{
bool may_use_call;
rtx retval = 0;
+ int endp = 0;
unsigned int align;
switch (method)
@@ -1715,6 +1752,11 @@ emit_block_move (x, y, size, method)
may_use_call = true;
break;
+ case BLOCK_OP_NORMAL_ENDP:
+ endp = 1;
+ may_use_call = true;
+ break;
+
case BLOCK_OP_CALL_PARM:
may_use_call = block_move_libcall_safe_for_call_parm ();
@@ -1760,11 +1802,18 @@ emit_block_move (x, y, size, method)
}
if (GET_CODE (size) == CONST_INT && MOVE_BY_PIECES_P (INTVAL (size), align))
- move_by_pieces (x, y, INTVAL (size), align);
- else if (emit_block_move_via_movstr (x, y, size, align))
+ {
+ rtx dest_mem = move_by_pieces (x, y, INTVAL (size), align, endp);
+
+ if (endp)
+ retval = force_operand (XEXP (dest_mem, 0), NULL_RTX);
+ }
+ else if (!endp && emit_block_move_via_movstr (x, y, size, align))
;
else if (may_use_call)
- retval = emit_block_move_via_libcall (x, y, size);
+ retval = emit_block_move_via_libcall (x, y, size, endp);
+ else if (endp)
+ abort ();
else
emit_block_move_via_loop (x, y, size, align);
@@ -1907,8 +1956,9 @@ emit_block_move_via_movstr (x, y, size,
Return the return value from memcpy, 0 otherwise. */
static rtx
-emit_block_move_via_libcall (dst, src, size)
+emit_block_move_via_libcall (dst, src, size, endp)
rtx dst, src, size;
+ int endp;
{
rtx dst_addr, src_addr;
tree call_expr, arg_list, fn, src_tree, dst_tree, size_tree;
@@ -1968,9 +2018,12 @@ emit_block_move_via_libcall (dst, src, s
else
size_tree = make_tree (unsigned_type_node, size);
- fn = emit_block_move_libcall_fn (true);
+ if (endp)
+ fn = emit_mempcpy_libcall_fn ();
+ else
+ fn = emit_block_move_libcall_fn (true);
arg_list = tree_cons (NULL_TREE, size_tree, NULL_TREE);
- if (TARGET_MEM_FUNCTIONS)
+ if (endp || TARGET_MEM_FUNCTIONS)
{
arg_list = tree_cons (NULL_TREE, src_tree, arg_list);
arg_list = tree_cons (NULL_TREE, dst_tree, arg_list);
@@ -1999,7 +2052,10 @@ emit_block_move_via_libcall (dst, src, s
gen_rtx_CLOBBER (VOIDmode, dst),
NULL_RTX));
- return TARGET_MEM_FUNCTIONS ? retval : NULL_RTX;
+ if (endp == 2)
+ retval = simplify_gen_binary (PLUS, GET_MODE (retval), retval,
+ constm1_rtx);
+ return (endp || TARGET_MEM_FUNCTIONS) ? retval : NULL_RTX;
}
/* A subroutine of emit_block_move_via_libcall. Create the tree node
@@ -2007,16 +2063,28 @@ emit_block_move_via_libcall (dst, src, s
is true, we call assemble_external. */
static GTY(()) tree block_move_fn;
+static GTY(()) tree mempcpy_fn;
void
-init_block_move_fn (asmspec)
+init_block_move_fn (asmspec, endp)
const char *asmspec;
+ int endp;
{
- if (!block_move_fn)
+ tree fn;
+
+ fn = endp ? mempcpy_fn : block_move_fn;
+ if (!fn)
{
- tree fn, args;
+ tree args;
- if (TARGET_MEM_FUNCTIONS)
+ if (endp)
+ {
+ fn = get_identifier ("mempcpy");
+ args = build_function_type_list (ptr_type_node, ptr_type_node,
+ const_ptr_type_node, sizetype,
+ NULL_TREE);
+ }
+ else if (TARGET_MEM_FUNCTIONS)
{
fn = get_identifier ("memcpy");
args = build_function_type_list (ptr_type_node, ptr_type_node,
@@ -2037,13 +2105,16 @@ init_block_move_fn (asmspec)
DECL_ARTIFICIAL (fn) = 1;
TREE_NOTHROW (fn) = 1;
- block_move_fn = fn;
+ if (endp)
+ mempcpy_fn = fn;
+ else
+ block_move_fn = fn;
}
if (asmspec)
{
- SET_DECL_RTL (block_move_fn, NULL_RTX);
- SET_DECL_ASSEMBLER_NAME (block_move_fn, get_identifier (asmspec));
+ SET_DECL_RTL (fn, NULL_RTX);
+ SET_DECL_ASSEMBLER_NAME (fn, get_identifier (asmspec));
}
}
@@ -2054,7 +2125,7 @@ emit_block_move_libcall_fn (for_call)
static bool emitted_extern;
if (!block_move_fn)
- init_block_move_fn (NULL);
+ init_block_move_fn (NULL, 0);
if (for_call && !emitted_extern)
{
@@ -2066,6 +2137,24 @@ emit_block_move_libcall_fn (for_call)
return block_move_fn;
}
+static tree
+emit_mempcpy_libcall_fn ()
+{
+ static bool emitted_extern;
+
+ if (!mempcpy_fn)
+ init_block_move_fn (NULL, 1);
+
+ if (!emitted_extern)
+ {
+ emitted_extern = true;
+ make_decl_rtl (mempcpy_fn, NULL);
+ assemble_external (mempcpy_fn);
+ }
+
+ return mempcpy_fn;
+}
+
/* A subroutine of emit_block_move. Copy the data via an explicit
loop. This is used only when libcalls are forbidden. */
/* ??? It'd be nice to copy in hunks larger than QImode. */
@@ -2738,15 +2827,19 @@ can_store_by_pieces (len, constfun, cons
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. */
+ ALIGN is maximum alignment we can assume.
+ If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
+ mempcpy, and if ENDP is 2 return memory the end minus one byte ala
+ stpcpy. */
-void
-store_by_pieces (to, len, constfun, constfundata, align)
+rtx
+store_by_pieces (to, len, constfun, constfundata, align, endp)
rtx to;
unsigned HOST_WIDE_INT len;
rtx (*constfun) PARAMS ((PTR, HOST_WIDE_INT, enum machine_mode));
PTR constfundata;
unsigned int align;
+ int endp;
{
struct store_by_pieces data;
@@ -2758,6 +2851,35 @@ store_by_pieces (to, len, constfun, cons
data.len = len;
data.to = to;
store_by_pieces_1 (&data, align);
+ if (endp)
+ {
+ rtx to1;
+
+ if (data.reverse)
+ abort ();
+ if (data.autinc_to)
+ {
+ if (endp == 2)
+ {
+ if (HAVE_POST_INCREMENT && data.explicit_inc_to > 0)
+ emit_insn (gen_add2_insn (data.to_addr, constm1_rtx));
+ else
+ data.to_addr = copy_addr_to_reg (plus_constant (data.to_addr,
+ -1));
+ }
+ to1 = adjust_automodify_address (data.to, QImode, data.to_addr,
+ data.offset);
+ }
+ else
+ {
+ if (endp == 2)
+ --data.offset;
+ to1 = adjust_address (data.to, QImode, data.offset);
+ }
+ return to1;
+ }
+ else
+ return data.to;
}
/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
@@ -3869,7 +3991,7 @@ emit_push_insn (x, mode, type, size, ali
&& where_pad != none && where_pad != stack_direction)
anti_adjust_stack (GEN_INT (extra));
- move_by_pieces (NULL, xinner, INTVAL (size) - used, align);
+ move_by_pieces (NULL, xinner, INTVAL (size) - used, align, 0);
}
else
#endif /* PUSH_ROUNDING */
--- gcc/c-decl.c.jj 2003-05-21 06:24:32.000000000 -0400
+++ gcc/c-decl.c 2003-05-21 11:59:20.000000000 -0400
@@ -2885,14 +2885,16 @@ finish_decl (decl, init, asmspec_tree)
tree builtin = built_in_decls [DECL_FUNCTION_CODE (decl)];
SET_DECL_RTL (builtin, NULL_RTX);
SET_DECL_ASSEMBLER_NAME (builtin, get_identifier (starred));
+ if (DECL_FUNCTION_CODE (decl) == BUILT_IN_MEMPCPY)
+ init_block_move_fn (starred, 1);
#ifdef TARGET_MEM_FUNCTIONS
if (DECL_FUNCTION_CODE (decl) == BUILT_IN_MEMCPY)
- init_block_move_fn (starred);
+ init_block_move_fn (starred, 0);
else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_MEMSET)
init_block_clear_fn (starred);
#else
if (DECL_FUNCTION_CODE (decl) == BUILT_IN_BCOPY)
- init_block_move_fn (starred);
+ init_block_move_fn (starred, 0);
else if (DECL_FUNCTION_CODE (decl) == BUILT_IN_BZERO)
init_block_clear_fn (starred);
#endif
--- gcc/rtl.h.jj 2003-05-16 05:55:04.000000000 -0400
+++ gcc/rtl.h 2003-05-21 12:37:11.000000000 -0400
@@ -2141,9 +2141,9 @@ extern void emit_jump PARAMS ((rtx));
extern int preserve_subexpressions_p PARAMS ((void));
/* In expr.c */
-extern void move_by_pieces PARAMS ((rtx, rtx,
+extern rtx move_by_pieces PARAMS ((rtx, rtx,
unsigned HOST_WIDE_INT,
- unsigned int));
+ unsigned int, int));
/* In flow.c */
extern void recompute_reg_usage PARAMS ((rtx, int));
--- gcc/config/mips/mips.c.jj 2003-05-21 06:24:40.000000000 -0400
+++ gcc/config/mips/mips.c 2003-05-21 12:27:08.000000000 -0400
@@ -3820,7 +3820,7 @@ expand_block_move (operands)
else if (constp && bytes <= (unsigned)2 * MAX_MOVE_BYTES
&& align == (unsigned) UNITS_PER_WORD)
- move_by_pieces (orig_dest, orig_src, bytes, align * BITS_PER_WORD);
+ move_by_pieces (orig_dest, orig_src, bytes, align * BITS_PER_WORD, 0);
else if (constp && bytes <= (unsigned)2 * MAX_MOVE_BYTES)
emit_insn (gen_movstrsi_internal (replace_equiv_address (orig_dest,
--- gcc/testsuite/gcc.c-torture/execute/string-opt-3.c.jj 2003-05-21 13:56:22.000000000 -0400
+++ gcc/testsuite/gcc.c-torture/execute/string-opt-3.c 2003-05-21 17:10:22.000000000 -0400
@@ -0,0 +1,166 @@
+/* Copyright (C) 2003 Free Software Foundation.
+
+ Ensure that builtin mempcpy and stpcpy perform correctly.
+
+ Written by Jakub Jelinek, 21/05/2003. */
+
+extern void abort (void);
+typedef __SIZE_TYPE__ size_t;
+extern void *mempcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+extern char *stpcpy (char *, const char *);
+
+long buf1[64];
+char *buf2 = (char *) (buf1 + 32);
+long buf5[20];
+char buf7[20];
+
+int
+__attribute__((noinline))
+test (long *buf3, char *buf4, char *buf6, int n)
+{
+ int i = 0;
+
+ /* These should probably be handled by store_by_pieces on most arches. */
+ if (mempcpy (buf1, "ABCDEFGHI", 9) != (char *) buf1 + 9
+ || memcmp (buf1, "ABCDEFGHI\0", 11))
+ abort ();
+
+ if (mempcpy (buf1, "abcdefghijklmnopq", 17) != (char *) buf1 + 17
+ || memcmp (buf1, "abcdefghijklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf3, "ABCDEF", 6) != (char *) buf1 + 6
+ || memcmp (buf1, "ABCDEFghijklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf3, "a", 1) != (char *) buf1 + 1
+ || memcmp (buf1, "aBCDEFghijklmnopq\0", 19))
+ abort ();
+
+ if (mempcpy ((char *) buf3 + 2, "bcd" + ++i, 2) != (char *) buf1 + 4
+ || memcmp (buf1, "aBcdEFghijklmnopq\0", 19)
+ || i != 1)
+ abort ();
+
+ /* These should probably be handled by move_by_pieces on most arches. */
+ if (mempcpy ((char *) buf3 + 4, buf5, 6) != (char *) buf1 + 10
+ || memcmp (buf1, "aBcdRSTUVWklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy ((char *) buf1 + ++i + 8, (char *) buf5 + 1, 1)
+ != (char *) buf1 + 11
+ || memcmp (buf1, "aBcdRSTUVWSlmnopq\0", 19)
+ || i != 2)
+ abort ();
+
+ if (mempcpy ((char *) buf3 + 14, buf6, 2) != (char *) buf1 + 16
+ || memcmp (buf1, "aBcdRSTUVWSlmnrsq\0", 19))
+ abort ();
+
+ if (mempcpy (buf3, buf5, 8) != (char *) buf1 + 8
+ || memcmp (buf1, "RSTUVWXYVWSlmnrsq\0", 19))
+ abort ();
+
+ if (mempcpy (buf3, buf5, 17) != (char *) buf1 + 17
+ || memcmp (buf1, "RSTUVWXYZ01234567\0", 19))
+ abort ();
+
+ __builtin_memcpy (buf3, "aBcdEFghijklmnopq\0", 19);
+
+ /* These should be handled either by movstrendM or mempcpy
+ call. */
+ if (mempcpy ((char *) buf3 + 4, buf5, n + 6) != (char *) buf1 + 10
+ || memcmp (buf1, "aBcdRSTUVWklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy ((char *) buf1 + ++i + 8, (char *) buf5 + 1, n + 1)
+ != (char *) buf1 + 12
+ || memcmp (buf1, "aBcdRSTUVWkSmnopq\0", 19)
+ || i != 3)
+ abort ();
+
+ if (mempcpy ((char *) buf3 + 14, buf6, n + 2) != (char *) buf1 + 16
+ || memcmp (buf1, "aBcdRSTUVWkSmnrsq\0", 19))
+ abort ();
+
+ i = 0;
+
+ /* These might be handled by store_by_pieces. */
+ if (mempcpy (buf2, "ABCDEFGHI", 9) != buf2 + 9
+ || memcmp (buf2, "ABCDEFGHI\0", 11))
+ abort ();
+
+ if (mempcpy (buf2, "abcdefghijklmnopq", 17) != buf2 + 17
+ || memcmp (buf2, "abcdefghijklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf4, "ABCDEF", 6) != buf2 + 6
+ || memcmp (buf2, "ABCDEFghijklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf4, "a", 1) != buf2 + 1
+ || memcmp (buf2, "aBCDEFghijklmnopq\0", 19))
+ abort ();
+
+ if (mempcpy (buf4 + 2, "bcd" + ++i, 2) != buf2 + 4
+ || memcmp (buf2, "aBcdEFghijklmnopq\0", 19)
+ || i != 1)
+ abort ();
+
+ /* These might be handled by move_by_pieces. */
+ if (mempcpy (buf4 + 4, buf7, 6) != buf2 + 10
+ || memcmp (buf2, "aBcdRSTUVWklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf2 + ++i + 8, buf7 + 1, 1)
+ != buf2 + 11
+ || memcmp (buf2, "aBcdRSTUVWSlmnopq\0", 19)
+ || i != 2)
+ abort ();
+
+ if (mempcpy (buf4 + 14, buf6, 2) != buf2 + 16
+ || memcmp (buf2, "aBcdRSTUVWSlmnrsq\0", 19))
+ abort ();
+
+ __builtin_memcpy (buf4, "aBcdEFghijklmnopq\0", 19);
+
+ /* These should be handled either by movstrendM or mempcpy
+ call. */
+ if (mempcpy (buf4 + 4, buf7, n + 6) != buf2 + 10
+ || memcmp (buf2, "aBcdRSTUVWklmnopq\0", 19))
+ abort ();
+
+ if (__builtin_mempcpy (buf2 + ++i + 8, buf7 + 1, n + 1)
+ != buf2 + 12
+ || memcmp (buf2, "aBcdRSTUVWkSmnopq\0", 19)
+ || i != 3)
+ abort ();
+
+ if (mempcpy (buf4 + 14, buf6, n + 2) != buf2 + 16
+ || memcmp (buf2, "aBcdRSTUVWkSmnrsq\0", 19))
+ abort ();
+
+ /* Now stpcpy tests. */
+ if (stpcpy ((char *) buf3, "abcdefghijklmnop") != (char *) buf1 + 16
+ || memcmp (buf1, "abcdefghijklmnop", 17))
+ abort ();
+
+ if (__builtin_stpcpy ((char *) buf3, "ABCDEFG") != (char *) buf1 + 7
+ || memcmp (buf1, "ABCDEFG\0ijklmnop", 17))
+ abort ();
+
+ if (stpcpy ((char *) buf3 + ++i, "x") != (char *) buf1 + 5
+ || memcmp (buf1, "ABCDx\0G\0ijklmnop", 17))
+ abort ();
+
+ return 0;
+}
+
+int
+main ()
+{
+ __builtin_memcpy (buf5, "RSTUVWXYZ0123456789", 20);
+ __builtin_memcpy (buf7, "RSTUVWXYZ0123456789", 20);
+ return test (buf1, buf2, "rstuvwxyz", 0);
+}
--- gcc/testsuite/gcc.dg/string-opt-1.c.jj 2003-05-21 13:16:51.000000000 -0400
+++ gcc/testsuite/gcc.dg/string-opt-1.c 2003-05-21 13:16:40.000000000 -0400
@@ -0,0 +1,11 @@
+/* Ensure mempcpy is not "optimized" into memcpy followed by addition. */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void *
+fn (char *x, char *y, int z)
+{
+ return __builtin_mempcpy (x, y, z);
+}
+
+/* { dg-final { scan-assembler-not "memcpy" } } */
Jakub