This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Optimize strcpy(p, "abc") and memcpy(p, "abc", 4) (take 2)
- To: rth at redhat dot com
- Subject: [PATCH] Optimize strcpy(p, "abc") and memcpy(p, "abc", 4) (take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- Date: Sun, 26 Nov 2000 22:51:40 +0100
- Cc: gcc-patches at gcc dot gnu dot org
- Reply-To: Jakub Jelinek <jakub at redhat dot com>
Hi!
This updated patch allows store_by_pieces callback to fail (in
builtin_memcpy case because some constant is not LEGITIMATE_CONSTANT_P as
suggested). Bootstrapped on i386-redhat-linux, no regressions.
The only thing I'm not 100% sure if it is valid to call protect_from_queue
(x, 1) on the same x twice (this could happen if some constant was not
legitimate, store_by_pieces would call protect_from_queue, then started a
sequence, end it, throw the sequence away, then emit_block_move would call it
again).
2000-11-26 Jakub Jelinek <jakub@redhat.com>
* expr.h (store_by_pieces): Add prototype.
* expr.c (struct store_by_pieces): Renamed from clear_by_pieces.
(store_by_pieces): New.
(clear_by_pieces): New.
(clear_by_pieces_1): New.
(store_by_pieces_1): Renamed from clear_by_pieces, handle storing
arbitrary compiler generated constants into memory block.
(store_by_pieces_2): Renamed from clear_by_pieces_1, likewise.
* builtins.c (c_readstr): New.
(builtin_memcpy_read_str): New.
(expand_builtin_memcpy): If src is string constant and
emit_block_move would move it by pieces, compute integer constants
from the string and store it into memory block instead.
* gcc.c-torture/execute/string-opt-6.c: New test.
--- gcc/testsuite/gcc.c-torture/execute/string-opt-6.c.jj Fri Nov 24 15:10:53 2000
+++ gcc/testsuite/gcc.c-torture/execute/string-opt-6.c Sat Nov 25 23:18:42 2000
@@ -0,0 +1,46 @@
+/* Copyright (C) 2000 Free Software Foundation.
+
+ Ensure builtin memcpy and strcpy perform correctly.
+
+ Written by Jakub Jelinek, 11/24/2000. */
+
+extern void abort (void);
+extern char *strcpy (char *, const char *);
+typedef __SIZE_TYPE__ size_t;
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+char p[32] = "";
+
+int main()
+{
+ if (strcpy (p, "abcde") != p || memcmp (p, "abcde", 6))
+ abort ();
+ if (strcpy (p + 16, "vwxyz" + 1) != p + 16 || memcmp (p + 16, "wxyz", 5))
+ abort ();
+ if (strcpy (p + 1, "") != p + 1 || memcmp (p, "a\0cde", 6))
+ abort ();
+ if (strcpy (p + 3, "fghij") != p + 3 || memcmp (p, "a\0cfghij", 9))
+ abort ();
+ if (memcpy (p, "ABCDE", 6) != p || memcmp (p, "ABCDE", 6))
+ abort ();
+ if (memcpy (p + 16, "VWX" + 1, 2) != p + 16 || memcmp (p + 16, "WXyz", 5))
+ abort ();
+ if (memcpy (p + 1, "", 1) != p + 1 || memcmp (p, "A\0CDE", 6))
+ abort ();
+ if (memcpy (p + 3, "FGHI", 4) != p + 3 || memcmp (p, "A\0CFGHIj", 9))
+ abort ();
+
+ return 0;
+}
+
+#ifdef __OPTIMIZE__
+/* When optimizing, all the above cases should be transformed into
+ something else. So any remaining calls to the original function
+ should abort. */
+static char *
+strcpy (char *d, const char *s)
+{
+ abort ();
+}
+#endif
--- gcc/expr.h.jj Mon Nov 6 09:38:24 2000
+++ gcc/expr.h Sat Nov 25 23:18:42 2000
@@ -1010,6 +1010,16 @@ extern void use_group_regs PARAMS ((rtx
alignment. */
extern rtx clear_storage PARAMS ((rtx, rtx, unsigned int));
+/* If storing by pieces is desirable, generate several move instructions to
+ store LEN bytes generated by CONSTFUN to block TO. (A MEM rtx with
+ BLKmode). CONSTFUNDATA is a pointer which will be passed as argument
+ in every CONSTFUN call. ALIGN is maximum alignment we can assume.
+ Return non-zero if it had emitted something. */
+extern int store_by_pieces PARAMS ((rtx, unsigned HOST_WIDE_INT,
+ rtx (*) (PTR, HOST_WIDE_INT,
+ enum machine_mode),
+ PTR, unsigned int));
+
/* Emit insns to set X from Y. */
extern rtx emit_move_insn PARAMS ((rtx, rtx));
--- gcc/expr.c.jj Fri Nov 24 10:47:59 2000
+++ gcc/expr.c Sun Nov 26 02:19:05 2000
@@ -127,10 +127,10 @@ struct move_by_pieces
int reverse;
};
-/* This structure is used by clear_by_pieces to describe the clear to
+/* This structure is used by store_by_pieces to describe the clear to
be performed. */
-struct clear_by_pieces
+struct store_by_pieces
{
rtx to;
rtx to_addr;
@@ -138,6 +138,8 @@ struct clear_by_pieces
int explicit_inc_to;
unsigned HOST_WIDE_INT len;
HOST_WIDE_INT offset;
+ rtx (*constfun) PARAMS ((PTR, HOST_WIDE_INT, enum machine_mode));
+ PTR constfundata;
int reverse;
};
@@ -151,11 +153,15 @@ static unsigned HOST_WIDE_INT move_by_pi
unsigned int));
static void move_by_pieces_1 PARAMS ((rtx (*) (rtx, ...), enum machine_mode,
struct move_by_pieces *));
+static rtx clear_by_pieces_1 PARAMS ((PTR, HOST_WIDE_INT,
+ enum machine_mode));
static void clear_by_pieces PARAMS ((rtx, unsigned HOST_WIDE_INT,
unsigned int));
-static void clear_by_pieces_1 PARAMS ((rtx (*) (rtx, ...),
+static int store_by_pieces_1 PARAMS ((struct store_by_pieces *,
+ unsigned int));
+static int store_by_pieces_2 PARAMS ((rtx (*) (rtx, ...),
enum machine_mode,
- struct clear_by_pieces *));
+ struct store_by_pieces *));
static rtx get_subtarget PARAMS ((rtx));
static int is_zeros_p PARAMS ((tree));
static int mostly_zeros_p PARAMS ((tree));
@@ -2249,6 +2255,40 @@ use_group_regs (call_fusage, regs)
}
}
+/* If storing by pieces is desirable, generate several move instructions to
+ store LEN bytes generated by CONSTFUN to block TO. (A MEM rtx with
+ BLKmode). CONSTFUNDATA is a pointer which will be passed as argument in
+ every CONSTFUN call. ALIGN is maximum alignment we can assume.
+ Return non-zero if it had emitted something. */
+
+int
+store_by_pieces (to, len, constfun, constfundata, align)
+ rtx to;
+ unsigned HOST_WIDE_INT len;
+ rtx (*constfun) PARAMS ((PTR, HOST_WIDE_INT, enum machine_mode));
+ PTR constfundata;
+ unsigned int align;
+{
+ struct store_by_pieces data;
+ rtx insns;
+ int ret;
+
+ if (! MOVE_BY_PIECES_P (len, align))
+ return 0;
+ to = protect_from_queue (to, 1);
+ data.constfun = constfun;
+ data.constfundata = constfundata;
+ data.len = len;
+ data.to = to;
+ start_sequence ();
+ ret = store_by_pieces_1 (&data, align);
+ insns = get_insns ();
+ end_sequence ();
+ if (ret)
+ emit_insns (insns);
+ return ret;
+}
+
/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
rtx with BLKmode). The caller must pass TO through protect_from_queue
before calling. ALIGN is maximum alignment we can assume. */
@@ -2259,31 +2299,58 @@ clear_by_pieces (to, len, align)
unsigned HOST_WIDE_INT len;
unsigned int align;
{
- struct clear_by_pieces data;
- rtx to_addr = XEXP (to, 0);
+ struct store_by_pieces data;
+
+ data.constfun = clear_by_pieces_1;
+ data.constfundata = NULL_PTR;
+ data.len = len;
+ data.to = to;
+ store_by_pieces_1 (&data, align);
+}
+
+/* Callback routine for clear_by_pieces.
+ Return const0_rtx unconditionally. */
+static rtx
+clear_by_pieces_1 (data, offset, mode)
+ PTR data ATTRIBUTE_UNUSED;
+ HOST_WIDE_INT offset ATTRIBUTE_UNUSED;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return const0_rtx;
+}
+
+/* Subroutine of clear_by_pieces and store_by_pieces.
+ Generate several move instructions to store LEN bytes of block TO. (A MEM
+ rtx with BLKmode). The caller must pass TO through protect_from_queue
+ before calling. ALIGN is maximum alignment we can assume. */
+
+static int
+store_by_pieces_1 (data, align)
+ struct store_by_pieces *data;
+ unsigned int align;
+{
+ rtx to_addr = XEXP (data->to, 0);
unsigned HOST_WIDE_INT max_size = MOVE_MAX_PIECES + 1;
enum machine_mode mode = VOIDmode, tmode;
enum insn_code icode;
- data.offset = 0;
- data.to_addr = to_addr;
- data.to = to;
- data.autinc_to
+ data->offset = 0;
+ data->to_addr = to_addr;
+ data->autinc_to
= (GET_CODE (to_addr) == PRE_INC || GET_CODE (to_addr) == PRE_DEC
|| GET_CODE (to_addr) == POST_INC || GET_CODE (to_addr) == POST_DEC);
- data.explicit_inc_to = 0;
- data.reverse
+ data->explicit_inc_to = 0;
+ data->reverse
= (GET_CODE (to_addr) == PRE_DEC || GET_CODE (to_addr) == POST_DEC);
- if (data.reverse)
- data.offset = len;
- data.len = len;
+ if (data->reverse)
+ data->offset = data->len;
- /* If copying requires more than two move insns,
+ /* If storing requires more than two move insns,
copy addresses to registers (to make displacements shorter)
and use post-increment if available. */
- if (!data.autinc_to
- && move_by_pieces_ninsns (len, align) > 2)
+ if (!data->autinc_to
+ && move_by_pieces_ninsns (data->len, align) > 2)
{
/* Determine the main mode we'll be using. */
for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
@@ -2291,30 +2358,30 @@ clear_by_pieces (to, len, align)
if (GET_MODE_SIZE (tmode) < max_size)
mode = tmode;
- if (USE_STORE_PRE_DECREMENT (mode) && data.reverse && ! data.autinc_to)
+ if (USE_STORE_PRE_DECREMENT (mode) && data->reverse && ! data->autinc_to)
{
- data.to_addr = copy_addr_to_reg (plus_constant (to_addr, len));
- data.autinc_to = 1;
- data.explicit_inc_to = -1;
+ data->to_addr = copy_addr_to_reg (plus_constant (to_addr, data->len));
+ data->autinc_to = 1;
+ data->explicit_inc_to = -1;
}
- if (USE_STORE_POST_INCREMENT (mode) && ! data.reverse
- && ! data.autinc_to)
+ if (USE_STORE_POST_INCREMENT (mode) && ! data->reverse
+ && ! data->autinc_to)
{
- data.to_addr = copy_addr_to_reg (to_addr);
- data.autinc_to = 1;
- data.explicit_inc_to = 1;
+ data->to_addr = copy_addr_to_reg (to_addr);
+ data->autinc_to = 1;
+ data->explicit_inc_to = 1;
}
- if ( !data.autinc_to && CONSTANT_P (to_addr))
- data.to_addr = copy_addr_to_reg (to_addr);
+ if ( !data->autinc_to && CONSTANT_P (to_addr))
+ data->to_addr = copy_addr_to_reg (to_addr);
}
if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
|| align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
align = MOVE_MAX * BITS_PER_UNIT;
- /* First move what we can in the largest integer mode, then go to
+ /* First store what we can in the largest integer mode, then go to
successively smaller modes. */
while (max_size > 1)
@@ -2328,29 +2395,31 @@ clear_by_pieces (to, len, align)
break;
icode = mov_optab->handlers[(int) mode].insn_code;
- if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
- clear_by_pieces_1 (GEN_FCN (icode), mode, &data);
+ if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)
+ && !store_by_pieces_2 (GEN_FCN (icode), mode, data))
+ return 0;
max_size = GET_MODE_SIZE (mode);
}
/* The code above should have handled everything. */
- if (data.len != 0)
+ if (data->len != 0)
abort ();
+ return 1;
}
-/* Subroutine of clear_by_pieces. Clear as many bytes as appropriate
+/* Subroutine of store_by_pieces_1. Store as many bytes as appropriate
with move instructions for mode MODE. GENFUN is the gen_... function
to make a move insn for that mode. DATA has all the other info. */
-static void
-clear_by_pieces_1 (genfun, mode, data)
+static int
+store_by_pieces_2 (genfun, mode, data)
rtx (*genfun) PARAMS ((rtx, ...));
enum machine_mode mode;
- struct clear_by_pieces *data;
+ struct store_by_pieces *data;
{
unsigned int size = GET_MODE_SIZE (mode);
- rtx to1;
+ rtx to1, cst;
while (data->len >= size)
{
@@ -2367,9 +2436,13 @@ clear_by_pieces_1 (genfun, mode, data)
plus_constant (data->to_addr, data->offset));
if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
- emit_insn (gen_add2_insn (data->to_addr, GEN_INT (-size)));
+ emit_insn (gen_add2_insn (data->to_addr,
+ GEN_INT (-(HOST_WIDE_INT) size)));
- emit_insn ((*genfun) (to1, const0_rtx));
+ cst = (*data->constfun) (data->constfundata, data->offset, mode);
+ if (!cst)
+ return 0;
+ emit_insn ((*genfun) (to1, cst));
if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
emit_insn (gen_add2_insn (data->to_addr, GEN_INT (size)));
@@ -2379,6 +2452,7 @@ clear_by_pieces_1 (genfun, mode, data)
data->len -= size;
}
+ return 1;
}
/* Write zeros through the storage of OBJECT. If OBJECT has BLKmode, SIZE is
--- gcc/builtins.c.jj Mon Nov 20 16:27:20 2000
+++ gcc/builtins.c Sat Nov 25 23:18:42 2000
@@ -81,6 +81,7 @@ tree (*lang_type_promotes_to) PARAMS ((t
static int get_pointer_alignment PARAMS ((tree, unsigned));
static tree c_strlen PARAMS ((tree));
static const char *c_getstr PARAMS ((tree));
+static rtx c_readstr PARAMS ((const char *, enum machine_mode));
static rtx get_memory_rtx PARAMS ((tree));
static int apply_args_size PARAMS ((void));
static int apply_result_size PARAMS ((void));
@@ -104,6 +105,8 @@ static rtx expand_builtin_memcmp PARAMS
#endif
static rtx expand_builtin_strcmp PARAMS ((tree, rtx,
enum machine_mode));
+static rtx builtin_memcpy_read_str PARAMS ((PTR, HOST_WIDE_INT,
+ enum machine_mode));
static rtx expand_builtin_memcpy PARAMS ((tree));
static rtx expand_builtin_strcpy PARAMS ((tree));
static rtx expand_builtin_memset PARAMS ((tree));
@@ -304,6 +307,42 @@ c_getstr (src)
return ptr + offset;
}
+/* Return a CONST_INT or CONST_DOUBLE corresponding to target
+ reading GET_MODE_BITSIZE (MODE) bits from string constant
+ STR. */
+static rtx
+c_readstr (str, mode)
+ const char *str;
+ enum machine_mode mode;
+{
+ HOST_WIDE_INT c[2];
+ HOST_WIDE_INT ch;
+ unsigned int i, j;
+
+ if (GET_MODE_CLASS (mode) != MODE_INT)
+ abort ();
+ c[0] = 0;
+ c[1] = 0;
+ ch = 1;
+ for (i = 0; i < GET_MODE_SIZE (mode); i++)
+ {
+ if (!ch)
+ abort (); /* Attempt to read past end of string. */
+ j = i;
+ if (WORDS_BIG_ENDIAN)
+ j = GET_MODE_SIZE (mode) - i - 1;
+ if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
+ j *= BITS_PER_UNIT;
+ if (j > 2 * HOST_BITS_PER_WIDE_INT)
+ abort ();
+ ch = (unsigned char) str[i];
+ c[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
+ }
+ return immed_double_const (c[0], c[1], mode);
+}
+
/* Given TEM, a pointer to a stack frame, follow the dynamic chain COUNT
times to get the address of either a higher stack frame, or a return
address located within it (depending on FNDECL_CODE). */
@@ -1681,6 +1720,25 @@ expand_builtin_strpbrk (arglist, target,
}
}
+/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
+ bytes from constant string DATA + OFFSET and return it as target
+ constant. */
+static rtx
+builtin_memcpy_read_str (data, offset, mode)
+ PTR data;
+ HOST_WIDE_INT offset;
+ enum machine_mode mode;
+{
+ const char *str = (const char *) data;
+ rtx cst;
+
+ if (offset + GET_MODE_SIZE (mode) > strlen (str) + 1)
+ abort (); /* Attempt to read past the end of constant string. */
+
+ cst = c_readstr (str + offset, mode);
+ return LEGITIMATE_CONSTANT_P (cst) ? cst : NULL_RTX;
+}
+
/* Expand a call to the memcpy builtin, with arguments in ARGLIST. */
static rtx
expand_builtin_memcpy (arglist)
@@ -1702,6 +1760,7 @@ expand_builtin_memcpy (arglist)
tree dest = TREE_VALUE (arglist);
tree src = TREE_VALUE (TREE_CHAIN (arglist));
tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ const char *src_str;
int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
@@ -1713,8 +1772,22 @@ expand_builtin_memcpy (arglist)
return 0;
dest_mem = get_memory_rtx (dest);
- src_mem = get_memory_rtx (src);
len_rtx = expand_expr (len, NULL_RTX, VOIDmode, 0);
+ src_str = c_getstr (src);
+
+ /* If SRC is a string constant and block move would be done
+ by pieces, we can avoid loading the string from memory
+ and only stored the computed constants. */
+ if (src_str
+ && !current_function_check_memory_usage
+ && GET_CODE (len_rtx) == CONST_INT
+ && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
+ && store_by_pieces (dest_mem, INTVAL (len_rtx),
+ builtin_memcpy_read_str,
+ (PTR) src_str, dest_align))
+ return force_operand (XEXP (dest_mem, 0), NULL_RTX);
+
+ src_mem = get_memory_rtx (src);
/* Just copy the rights of SRC to the rights of DEST. */
if (current_function_check_memory_usage)
Jakub