This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix *_BY_PIECES_P


On Fri, Aug 06, 2004 at 01:54:32PM -0700, James E Wilson wrote:
> Jakub Jelinek wrote:
> >I think best would be to add a max_size argument to this function,
> >which would be MOVE_MAX_PIECES + 1 or STORE_MAX_PIECES + 1 depending
> >on in which macro or routine it is used.
> 
> I think all of the uses of MOVE_MAX in expr.c are pointless, since we 
> will never consider anything bigger than MOVE_MAX_PIECES.  However, 
> since we have STORE_MAX_PIECES, and it may be different than 
> MOVE_MAX_PIECES, I think your suggestion is right.  We need to pass in 
> the appropriate *_MAX_PIECES value to move_by_pieces_ninsns.
> 
> >Such change would need perhaps some corrections to i386, x86_64 and SH
> >MOVE_RATIO/CLEAR_RATIO/MOVE_BY_PIECES_P settings.
> 
> The x86 CLEAR_RATIO should perhaps be the same as the x86 MOVE_RATIO. 
> And of course the MOVE_BY_PIECES_P macro in the sh port has to change if 
> we add another option to move_by_pieces_ninsns.  Otherwise, it isn't 
> clear that anything else has to change.

The following patch does that.
I have added a 5 insn cap for bzero on i386, because GCC doesn't optimize
many identical constant arguments by loading the constant into a register
and using that register in the instructions.
For very small number of instructions that isn't worthwhile, but e.g.
movl $0, (%edi)
movl $0, 4(%edi)
movl $0, 8(%edi)
movl $0, 12(%edi)
movl $0, 16(%edi)
movl $0, 20(%edi)
movl $0, 24(%edi)
movl $0, 28(%edi)
is already much bigger and also slightly slower on e.g. P4 than:
xorl %eax, %eax
movl %eax, (%edi)
movl %eax, 4(%edi)
movl %eax, 8(%edi)
movl %eax, 12(%edi)
movl %eax, 16(%edi)
movl %eax, 20(%edi)
movl %eax, 24(%edi)
movl %eax, 28(%edi)

Ok to commit?

2004-08-09  Jakub Jelinek  <jakub@redhat.com>

	PR target/3144
	* expr.c (move_by_pieces_ninsns): Add max_size argument.
	(MOVE_BY_PIECES_P): Pass MOVE_MAX_PIECES + 1 to it.
	(CLEAR_BY_PIECES_P): Pass STORE_MAX_PIECES + 1 to it.
	(STORE_BY_PIECES_P): Define similarly to CLEAR_BY_PIECES_P,
	but using MOVE_RATIO.
	(move_by_pieces): Pass max_size to move_by_pieces_ninsns.
	(can_store_by_pieces): Change max_size type to unsigned int.
	(store_by_pieces_1): Likewise.  Pass max_size to
	move_by_pieces_ninsns.
	* config/s390/s390.h (STORE_BY_PIECES_P): Define.
	* config/sh/sh.c (MOVE_BY_PIECES_P): Pass MOVE_MAX_PIECES + 1
	to move_by_pieces_ninsns.
	(STORE_BY_PIECES_P): Define.
	* config/ns32k/ns32k.h (STORE_BY_PIECES_P): Pass STORE_MAX_PIECES + 1
	to move_by_pieces_ninsns.
	* doc/tm.texi (STORE_BY_PIECES_P): Document changed default.

	* config/i386/i386.h (CLEAR_RATIO): Define.

--- gcc/config/s390/s390.h.jj	2004-07-21 17:04:56.000000000 +0200
+++ gcc/config/s390/s390.h	2004-08-06 14:35:59.000000000 +0200
@@ -855,6 +855,11 @@ extern struct rtx_def *s390_compare_op0,
   ( (SIZE) == 1 || (SIZE) == 2 || (SIZE) == 4	\
     || (TARGET_64BIT && (SIZE) == 8) )
 
+/* This macro is used to determine whether store_by_pieces should be
+   called to "memset" storage with byte values other than zero, or
+   to "memcpy" storage when the source is a constant string.  */
+#define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN)
+
 /* Don't perform CSE on function addresses.  */
 #define NO_FUNCTION_CSE
 
--- gcc/config/sh/sh.h.jj	2004-08-05 12:06:10.000000000 +0200
+++ gcc/config/sh/sh.h	2004-08-06 14:00:06.000000000 +0200
@@ -2368,9 +2368,13 @@ struct sh_args {
 #define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
                                            ? 0 : TARGET_SH1)
 
-#define MOVE_BY_PIECES_P(SIZE, ALIGN)  (move_by_pieces_ninsns (SIZE, ALIGN) \
-                                        < (TARGET_SMALLCODE ? 2 :           \
-                                           ((ALIGN >= 32) ? 16 : 2)))
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (TARGET_SMALLCODE ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (TARGET_SMALLCODE ? 2 : ((ALIGN >= 32) ? 16 : 2)))
 
 /* Macros to check register numbers against specific register classes.  */
 
--- gcc/config/i386/i386.h.jj	2004-08-02 21:22:59.000000000 +0200
+++ gcc/config/i386/i386.h	2004-08-06 14:53:31.024356276 +0200
@@ -2518,6 +2518,12 @@ enum ix86_builtins
 
 #define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
 
+/* If a clear memory operation would take CLEAR_RATIO or more simple
+   move-instruction sequences, we will do a clrmem or libcall instead.  */
+
+#define CLEAR_RATIO (optimize_size ? 2 \
+		     : ix86_cost->move_ratio > 6 ? 6 : ix86_cost->move_ratio)
+
 /* Define if shifts truncate the shift count
    which implies one can omit a sign-extension or zero-extension
    of a shift count.  */
--- gcc/config/ns32k/ns32k.h.jj	2004-07-09 13:51:13.000000000 +0200
+++ gcc/config/ns32k/ns32k.h	2004-08-06 14:01:28.702834441 +0200
@@ -1109,8 +1109,8 @@ __transfer_from_trampoline ()		\
 
 #define STORE_RATIO (optimize_size ? 3 : 15)
 #define STORE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN) < (unsigned int) STORE_RATIO)
-
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (unsigned int) STORE_RATIO)
 
 /* Nonzero if access to memory by bytes is slow and undesirable.  */
 #define SLOW_BYTE_ACCESS 0
--- gcc/expr.c.jj	2004-08-05 12:05:59.000000000 +0200
+++ gcc/expr.c	2004-08-06 14:56:10.596289966 +0200
@@ -120,6 +120,7 @@ struct store_by_pieces
 };
 
 static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
+						     unsigned int,
 						     unsigned int);
 static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
 			      struct move_by_pieces *);
@@ -175,21 +176,25 @@ static bool float_extend_from_mem[NUM_MA
    to perform a structure copy.  */
 #ifndef MOVE_BY_PIECES_P
 #define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN) < (unsigned int) MOVE_RATIO)
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (unsigned int) MOVE_RATIO)
 #endif
 
 /* This macro is used to determine whether clear_by_pieces should be
    called to clear storage.  */
 #ifndef CLEAR_BY_PIECES_P
 #define CLEAR_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_pieces_ninsns (SIZE, ALIGN) < (unsigned int) CLEAR_RATIO)
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (unsigned int) CLEAR_RATIO)
 #endif
 
 /* This macro is used to determine whether store_by_pieces should be
    called to "memset" storage with byte values other than zero, or
    to "memcpy" storage when the source is a constant string.  */
 #ifndef STORE_BY_PIECES_P
-#define STORE_BY_PIECES_P(SIZE, ALIGN)	MOVE_BY_PIECES_P (SIZE, ALIGN)
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (unsigned int) MOVE_RATIO)
 #endif
 
 /* This array records the insn_code of insns to perform block moves.  */
@@ -883,7 +888,7 @@ move_by_pieces (rtx to, rtx from, unsign
      copy addresses to registers (to make displacements shorter)
      and use post-increment if available.  */
   if (!(data.autinc_from && data.autinc_to)
-      && move_by_pieces_ninsns (len, align) > 2)
+      && move_by_pieces_ninsns (len, align, max_size) > 2)
     {
       /* Find the mode of the largest move...  */
       for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
@@ -984,10 +989,10 @@ move_by_pieces (rtx to, rtx from, unsign
    ALIGN (in bits) is maximum alignment we can assume.  */
 
 static unsigned HOST_WIDE_INT
-move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align)
+move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
+		       unsigned int max_size)
 {
   unsigned HOST_WIDE_INT n_insns = 0;
-  unsigned HOST_WIDE_INT max_size = MOVE_MAX + 1;
 
   if (! SLOW_UNALIGNED_ACCESS (word_mode, align)
       || align > MOVE_MAX * BITS_PER_UNIT || align >= BIGGEST_ALIGNMENT)
@@ -1981,7 +1986,8 @@ can_store_by_pieces (unsigned HOST_WIDE_
 		     rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode),
 		     void *constfundata, unsigned int align)
 {
-  unsigned HOST_WIDE_INT max_size, l;
+  unsigned HOST_WIDE_INT l;
+  unsigned int max_size;
   HOST_WIDE_INT offset = 0;
   enum machine_mode mode, tmode;
   enum insn_code icode;
@@ -2149,7 +2155,7 @@ store_by_pieces_1 (struct store_by_piece
 		   unsigned int align ATTRIBUTE_UNUSED)
 {
   rtx to_addr = XEXP (data->to, 0);
-  unsigned HOST_WIDE_INT max_size = STORE_MAX_PIECES + 1;
+  unsigned int max_size = STORE_MAX_PIECES + 1;
   enum machine_mode mode = VOIDmode, tmode;
   enum insn_code icode;
 
@@ -2169,7 +2175,7 @@ store_by_pieces_1 (struct store_by_piece
      copy addresses to registers (to make displacements shorter)
      and use post-increment if available.  */
   if (!data->autinc_to
-      && move_by_pieces_ninsns (data->len, align) > 2)
+      && move_by_pieces_ninsns (data->len, align, max_size) > 2)
     {
       /* Determine the main mode we'll be using.  */
       for (tmode = GET_CLASS_NARROWEST_MODE (MODE_INT);
--- gcc/doc/tm.texi.jj	2004-08-05 12:06:22.000000000 +0200
+++ gcc/doc/tm.texi	2004-08-06 14:04:40.212139269 +0200
@@ -5399,7 +5399,8 @@ used to set a chunk of memory to a const
 mechanism will be used.  Used by @code{__builtin_memset} when storing
 values other than constant zero and by @code{__builtin_strcpy} when
 when called with a constant source string.
-Defaults to @code{MOVE_BY_PIECES_P}.
+Defaults to to 1 if @code{move_by_pieces_ninsns} returns less
+than @code{MOVE_RATIO}.
 @end defmac
 
 @defmac USE_LOAD_POST_INCREMENT (@var{mode})


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]