This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[PATCH] Optimize strcpy(p, "abc") and memcpy(p, "abc", 4) (take 2)


Hi!

This updated patch allows store_by_pieces callback to fail (in
builtin_memcpy case because some constant is not LEGITIMATE_CONSTANT_P as
suggested). Bootstrapped on i386-redhat-linux, no regressions.
The only thing I'm not 100% sure if it is valid to call protect_from_queue
(x, 1) on the same x twice (this could happen if some constant was not
legitimate, store_by_pieces would call protect_from_queue, then started a
sequence, end it, throw the sequence away, then emit_block_move would call it
again).

2000-11-26  Jakub Jelinek  <jakub@redhat.com>

	* expr.h (store_by_pieces): Add prototype.
	* expr.c (struct store_by_pieces): Renamed from clear_by_pieces.
	(store_by_pieces): New.
	(clear_by_pieces): New.
	(clear_by_pieces_1): New.
	(store_by_pieces_1): Renamed from clear_by_pieces, handle storing
	arbitrary compiler generated constants into memory block.
	(store_by_pieces_2): Renamed from clear_by_pieces_1, likewise.
	* builtins.c (c_readstr): New.
	(builtin_memcpy_read_str): New.
	(expand_builtin_memcpy): If src is string constant and
	emit_block_move would move it by pieces, compute integer constants
	from the string and store it into memory block instead.

	* gcc.c-torture/execute/string-opt-6.c: New test.

--- gcc/testsuite/gcc.c-torture/execute/string-opt-6.c.jj	Fri Nov 24 15:10:53 2000
+++ gcc/testsuite/gcc.c-torture/execute/string-opt-6.c	Sat Nov 25 23:18:42 2000
@@ -0,0 +1,46 @@
+/* Copyright (C) 2000  Free Software Foundation.
+
+   Ensure builtin memcpy and strcpy perform correctly.
+
+   Written by Jakub Jelinek, 11/24/2000.  */
+
+extern void abort (void);
+extern char *strcpy (char *, const char *);
+typedef __SIZE_TYPE__ size_t;
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+char p[32] = "";
+
+int main()
+{
+  if (strcpy (p, "abcde") != p || memcmp (p, "abcde", 6))
+    abort ();
+  if (strcpy (p + 16, "vwxyz" + 1) != p + 16 || memcmp (p + 16, "wxyz", 5))
+    abort ();
+  if (strcpy (p + 1, "") != p + 1 || memcmp (p, "a\0cde", 6))
+    abort ();  
+  if (strcpy (p + 3, "fghij") != p + 3 || memcmp (p, "a\0cfghij", 9))
+    abort ();
+  if (memcpy (p, "ABCDE", 6) != p || memcmp (p, "ABCDE", 6))
+    abort ();
+  if (memcpy (p + 16, "VWX" + 1, 2) != p + 16 || memcmp (p + 16, "WXyz", 5))
+    abort ();
+  if (memcpy (p + 1, "", 1) != p + 1 || memcmp (p, "A\0CDE", 6))
+    abort ();  
+  if (memcpy (p + 3, "FGHI", 4) != p + 3 || memcmp (p, "A\0CFGHIj", 9))
+    abort ();
+
+  return 0;
+}
+
+#ifdef __OPTIMIZE__
+/* When optimizing, all the above cases should be transformed into
+   something else.  So any remaining calls to the original function
+   should abort.  */
+static char *
+strcpy (char *d, const char *s)
+{
+  abort ();
+}
+#endif
--- gcc/expr.h.jj	Mon Nov  6 09:38:24 2000
+++ gcc/expr.h	Sat Nov 25 23:18:42 2000
@@ -1010,6 +1010,16 @@ extern void use_group_regs PARAMS ((rtx 
    alignment.  */
 extern rtx clear_storage PARAMS ((rtx, rtx, unsigned int));
 
+/* If storing by pieces is desirable, generate several move instructions to
+   store LEN bytes generated by CONSTFUN to block TO.  (A MEM rtx with
+   BLKmode).  CONSTFUNDATA is a pointer which will be passed as argument
+   in every CONSTFUN call.  ALIGN is maximum alignment we can assume.
+   Return non-zero if it had emitted something.  */
+extern int store_by_pieces PARAMS ((rtx, unsigned HOST_WIDE_INT,
+				    rtx (*) (PTR, HOST_WIDE_INT,
+					     enum machine_mode),
+				    PTR, unsigned int));
+
 /* Emit insns to set X from Y.  */
 extern rtx emit_move_insn PARAMS ((rtx, rtx));
 
--- gcc/expr.c.jj	Fri Nov 24 10:47:59 2000
+++ gcc/expr.c	Sun Nov 26 02:19:05 2000
@@ -127,10 +127,10 @@ struct move_by_pieces
   int reverse;
 };
 
-/* This structure is used by clear_by_pieces to describe the clear to
+/* This structure is used by store_by_pieces to describe the clear to
    be performed.  */
 
-struct clear_by_pieces
+struct store_by_pieces
 {
   rtx to;
   rtx to_addr;
@@ -138,6 +138,8 @@ struct clear_by_pieces
   int explicit_inc_to;
   unsigned HOST_WIDE_INT len;
   HOST_WIDE_INT offset;
+  rtx (*constfun) PARAMS ((PTR, HOST_WIDE_INT, enum machine_mode));
+  PTR constfundata;
   int reverse;
 };
 
@@ -151,11 +153,15 @@ static unsigned HOST_WIDE_INT move_by_pi
 					 unsigned int));
 static void move_by_pieces_1	PARAMS ((rtx (*) (rtx, ...), enum machine_mode,
 					 struct move_by_pieces *));
+static rtx clear_by_pieces_1	PARAMS ((PTR, HOST_WIDE_INT,
+					 enum machine_mode));
 static void clear_by_pieces	PARAMS ((rtx, unsigned HOST_WIDE_INT,
 					 unsigned int));
-static void clear_by_pieces_1	PARAMS ((rtx (*) (rtx, ...),
+static int store_by_pieces_1	PARAMS ((struct store_by_pieces *,
+					 unsigned int));
+static int store_by_pieces_2	PARAMS ((rtx (*) (rtx, ...),
 					 enum machine_mode,
-					 struct clear_by_pieces *));
+					 struct store_by_pieces *));
 static rtx get_subtarget	PARAMS ((rtx));
 static int is_zeros_p		PARAMS ((tree));
 static int mostly_zeros_p	PARAMS ((tree));
@@ -2249,6 +2255,40 @@ use_group_regs (call_fusage, regs)
     }
 }
 
+/* If storing by pieces is desirable, generate several move instructions to
+   store LEN bytes generated by CONSTFUN to block TO.  (A MEM rtx with
+   BLKmode).  CONSTFUNDATA is a pointer which will be passed as argument in
+   every CONSTFUN call.  ALIGN is maximum alignment we can assume.
+   Return non-zero if it had emitted something.  */
+
+int
+store_by_pieces (to, len, constfun, constfundata, align)
+     rtx to;
+     unsigned HOST_WIDE_INT len;
+     rtx (*constfun) PARAMS ((PTR, HOST_WIDE_INT, enum machine_mode));
+     PTR constfundata;
+     unsigned int align;
+{
+  struct store_by_pieces data;
+  rtx insns;
+  int ret;
+
+  if (! MOVE_BY_PIECES_P (len, align))
+    return 0;
+  to = protect_from_queue (to, 1);
+  data.constfun = constfun;
+  data.constfundata = constfundata;
+  data.len = len;
+  data.to = to;
+  start_sequence ();
+  ret = store_by_pieces_1 (&data, align);
+  insns = get_insns ();
+  end_sequence ();
+  if (ret)
+    emit_insns (insns);
+  return ret;
+}
+
 /* Generate several move instructions to clear LEN bytes of block TO.  (A MEM
    rtx with BLKmode).  The caller must pass TO through protect_from_queue
    before calling. ALIGN is maximum alignment we can assume.  */
@@ -2259,31 +2299,58 @@ clear_by_pieces (to, len, align)
      unsigned HOST_WIDE_INT len;
      unsigned int align;
 {
-  struct clear_by_pieces data;
-  rtx to_addr = XEXP (to, 0);
+  struct store_by_pieces data;
+
+  data.constfun = clear_by_pieces_1;
+  data.constfundata = NULL_PTR;
+  data.len = len;
+  data.to = to;
+  store_by_pieces_1 (&data, align);
+}
+
+/* Callback routine for clear_by_pieces.
+   Return const0_rtx unconditionally.  */
+static rtx
+clear_by_pieces_1 (data, offset, mode)
+     PTR data ATTRIBUTE_UNUSED;
+     HOST_WIDE_INT offset ATTRIBUTE_UNUSED;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return const0_rtx;
+}
+
+/* Subroutine of clear_by_pieces and store_by_pieces.
+   Generate several move instructions to store LEN bytes of block TO.  (A MEM
+   rtx with BLKmode).  The caller must pass TO through protect_from_queue
+   before calling. ALIGN is maximum alignment we can assume.  */
+
+static int
+store_by_pieces_1 (data, align)
+     struct store_by_pieces *data;
+     unsigned int align;
+{
+  rtx to_addr = XEXP (data->to, 0);
   unsigned HOST_WIDE_INT max_size = MOVE_MAX_PIECES + 1;
   enum machine_mode mode = VOIDmode, tmode;
   enum insn_code icode;
 
-  data.offset = 0;
-  data.to_addr = to_addr;
-  data.to = to;
-  data.autinc_to
+  data->offset = 0;
+  data->to_addr = to_addr;
+  data->autinc_to
     = (GET_CODE (to_addr) == PRE_INC || GET_CODE (to_addr) == PRE_DEC
        || GET_CODE (to_addr) == POST_INC || GET_CODE (to_addr) == POST_DEC);
 
-  data.explicit_inc_to = 0;
-  data.reverse
+  data->explicit_inc_to = 0;
+  data->reverse
     = (GET_CODE (to_addr) == PRE_DEC || GET_CODE (to_addr) == POST_DEC);
-  if (data.reverse)
-    data.offset = len;
-  data.len = len;
+  if (data->reverse)
+    data->offset = data->len;
 
-  /* If copying requires more than two move insns,
+  /* If storing requires more than two move insns,
      copy addresses to registers (to make displacements shorter)
      and use post-increment if available.  */
-  if (!data.autinc_to
-      && move_by_pieces_ninsns (len, align) > 2)
+  if (!data->autinc_to
+      && move_by_pieces_ninsns (data->len, align) > 2)
     {
       /* Determine the main mode we'll be using.  */
       for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
@@ -2291,30 +2358,30 @@ clear_by_pieces (to, len, align)
 	if (GET_MODE_SIZE (tmode) < max_size)
 	  mode = tmode;
 
-      if (USE_STORE_PRE_DECREMENT (mode) && data.reverse && ! data.autinc_to)
+      if (USE_STORE_PRE_DECREMENT (mode) && data->reverse && ! data->autinc_to)
 	{
-	  data.to_addr = copy_addr_to_reg (plus_constant (to_addr, len));
-	  data.autinc_to = 1;
-	  data.explicit_inc_to = -1;
+	  data->to_addr = copy_addr_to_reg (plus_constant (to_addr, data->len));
+	  data->autinc_to = 1;
+	  data->explicit_inc_to = -1;
 	}
 
-      if (USE_STORE_POST_INCREMENT (mode) && ! data.reverse
-	  && ! data.autinc_to)
+      if (USE_STORE_POST_INCREMENT (mode) && ! data->reverse
+	  && ! data->autinc_to)
 	{
-	  data.to_addr = copy_addr_to_reg (to_addr);
-	  data.autinc_to = 1;
-	  data.explicit_inc_to = 1;
+	  data->to_addr = copy_addr_to_reg (to_addr);
+	  data->autinc_to = 1;
+	  data->explicit_inc_to = 1;
 	}
 
-      if ( !data.autinc_to && CONSTANT_P (to_addr))
-	data.to_addr = copy_addr_to_reg (to_addr);
+      if ( !data->autinc_to && CONSTANT_P (to_addr))
+	data->to_addr = copy_addr_to_reg (to_addr);
     }
 
   if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
       || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
     align = MOVE_MAX * BITS_PER_UNIT;
 
-  /* First move what we can in the largest integer mode, then go to
+  /* First store what we can in the largest integer mode, then go to
      successively smaller modes.  */
 
   while (max_size > 1)
@@ -2328,29 +2395,31 @@ clear_by_pieces (to, len, align)
 	break;
 
       icode = mov_optab->handlers[(int) mode].insn_code;
-      if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
-	clear_by_pieces_1 (GEN_FCN (icode), mode, &data);
+      if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)
+	  && !store_by_pieces_2 (GEN_FCN (icode), mode, data))
+	return 0;
 
       max_size = GET_MODE_SIZE (mode);
     }
 
   /* The code above should have handled everything.  */
-  if (data.len != 0)
+  if (data->len != 0)
     abort ();
+  return 1;
 }
 
-/* Subroutine of clear_by_pieces.  Clear as many bytes as appropriate
+/* Subroutine of store_by_pieces_1.  Store as many bytes as appropriate
    with move instructions for mode MODE.  GENFUN is the gen_... function
    to make a move insn for that mode.  DATA has all the other info.  */
 
-static void
-clear_by_pieces_1 (genfun, mode, data)
+static int
+store_by_pieces_2 (genfun, mode, data)
      rtx (*genfun) PARAMS ((rtx, ...));
      enum machine_mode mode;
-     struct clear_by_pieces *data;
+     struct store_by_pieces *data;
 {
   unsigned int size = GET_MODE_SIZE (mode);
-  rtx to1;
+  rtx to1, cst;
 
   while (data->len >= size)
     {
@@ -2367,9 +2436,13 @@ clear_by_pieces_1 (genfun, mode, data)
 			      plus_constant (data->to_addr, data->offset));
 
       if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
-	emit_insn (gen_add2_insn (data->to_addr, GEN_INT (-size)));
+	emit_insn (gen_add2_insn (data->to_addr,
+				  GEN_INT (-(HOST_WIDE_INT) size)));
 
-      emit_insn ((*genfun) (to1, const0_rtx));
+      cst = (*data->constfun) (data->constfundata, data->offset, mode);
+      if (!cst)
+	return 0;
+      emit_insn ((*genfun) (to1, cst));
 
       if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
 	emit_insn (gen_add2_insn (data->to_addr, GEN_INT (size)));
@@ -2379,6 +2452,7 @@ clear_by_pieces_1 (genfun, mode, data)
 
       data->len -= size;
     }
+  return 1;
 }
 
 /* Write zeros through the storage of OBJECT.  If OBJECT has BLKmode, SIZE is
--- gcc/builtins.c.jj	Mon Nov 20 16:27:20 2000
+++ gcc/builtins.c	Sat Nov 25 23:18:42 2000
@@ -81,6 +81,7 @@ tree (*lang_type_promotes_to) PARAMS ((t
 static int get_pointer_alignment	PARAMS ((tree, unsigned));
 static tree c_strlen			PARAMS ((tree));
 static const char *c_getstr		PARAMS ((tree));
+static rtx c_readstr			PARAMS ((const char *, enum machine_mode));
 static rtx get_memory_rtx		PARAMS ((tree));
 static int apply_args_size		PARAMS ((void));
 static int apply_result_size		PARAMS ((void));
@@ -104,6 +105,8 @@ static rtx expand_builtin_memcmp	PARAMS 
 #endif
 static rtx expand_builtin_strcmp	PARAMS ((tree, rtx,
 						 enum machine_mode));
+static rtx builtin_memcpy_read_str	PARAMS ((PTR, HOST_WIDE_INT,
+						 enum machine_mode));
 static rtx expand_builtin_memcpy	PARAMS ((tree));
 static rtx expand_builtin_strcpy	PARAMS ((tree));
 static rtx expand_builtin_memset	PARAMS ((tree));
@@ -304,6 +307,42 @@ c_getstr (src)
   return ptr + offset;
 }
 
+/* Return a CONST_INT or CONST_DOUBLE corresponding to target
+   reading GET_MODE_BITSIZE (MODE) bits from string constant
+   STR.  */
+static rtx
+c_readstr (str, mode)
+     const char *str;
+     enum machine_mode mode;
+{
+  HOST_WIDE_INT c[2];
+  HOST_WIDE_INT ch;
+  unsigned int i, j;
+
+  if (GET_MODE_CLASS (mode) != MODE_INT)
+    abort ();
+  c[0] = 0;
+  c[1] = 0;
+  ch = 1;
+  for (i = 0; i < GET_MODE_SIZE (mode); i++)
+    {
+      if (!ch)
+	abort ();  /* Attempt to read past end of string.  */
+      j = i;
+      if (WORDS_BIG_ENDIAN)
+	j = GET_MODE_SIZE (mode) - i - 1;
+      if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+	j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
+      j *= BITS_PER_UNIT;
+      if (j > 2 * HOST_BITS_PER_WIDE_INT)
+	abort ();
+      ch = (unsigned char) str[i];
+      c[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
+    }
+  return immed_double_const (c[0], c[1], mode);
+}
+
 /* Given TEM, a pointer to a stack frame, follow the dynamic chain COUNT
    times to get the address of either a higher stack frame, or a return
    address located within it (depending on FNDECL_CODE).  */
@@ -1681,6 +1720,25 @@ expand_builtin_strpbrk (arglist, target,
     }
 }
 
+/* Callback routine for store_by_pieces.  Read GET_MODE_BITSIZE (MODE)
+   bytes from constant string DATA + OFFSET and return it as target
+   constant.  */
+static rtx
+builtin_memcpy_read_str (data, offset, mode)
+     PTR data;
+     HOST_WIDE_INT offset;
+     enum machine_mode mode;
+{
+  const char *str = (const char *) data;
+  rtx cst;
+
+  if (offset + GET_MODE_SIZE (mode) > strlen (str) + 1)
+    abort ();  /* Attempt to read past the end of constant string.  */
+
+  cst = c_readstr (str + offset, mode);
+  return LEGITIMATE_CONSTANT_P (cst) ? cst : NULL_RTX;
+}
+
 /* Expand a call to the memcpy builtin, with arguments in ARGLIST.  */
 static rtx
 expand_builtin_memcpy (arglist)
@@ -1702,6 +1760,7 @@ expand_builtin_memcpy (arglist)
       tree dest = TREE_VALUE (arglist);
       tree src = TREE_VALUE (TREE_CHAIN (arglist));
       tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      const char *src_str;
 
       int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
       int dest_align = get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
@@ -1713,8 +1772,22 @@ expand_builtin_memcpy (arglist)
 	return 0;
 
       dest_mem = get_memory_rtx (dest);
-      src_mem = get_memory_rtx (src);
       len_rtx = expand_expr (len, NULL_RTX, VOIDmode, 0);
+      src_str = c_getstr (src);
+
+      /* If SRC is a string constant and block move would be done
+	 by pieces, we can avoid loading the string from memory
+	 and only stored the computed constants.  */
+      if (src_str
+	  && !current_function_check_memory_usage
+	  && GET_CODE (len_rtx) == CONST_INT
+	  && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
+	  && store_by_pieces (dest_mem, INTVAL (len_rtx),
+			      builtin_memcpy_read_str,
+			      (PTR) src_str, dest_align))
+	return force_operand (XEXP (dest_mem, 0), NULL_RTX);
+
+      src_mem = get_memory_rtx (src);
 
       /* Just copy the rights of SRC to the rights of DEST.  */
       if (current_function_check_memory_usage)

	Jakub

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]