This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, RFC] PowerPC movmem and clrmem
- From: David Edelsohn <dje at watson dot ibm dot com>
- To: Dale Johannesen <dalej at apple dot com>, Geoff Keating <geoffk at geoffk dot org>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 09 Aug 2004 17:21:45 -0400
- Subject: [PATCH, RFC] PowerPC movmem and clrmem
The discussion about *_BY_PIECES prompted me to investigate the
current state of movmem and clrmem for PowerPC. The PowerPC port did not
define any clrmemMM pattern, so expr.c was defaulting to
clear_by_pieces(), which makes some bad decisions about instruction
selection and alignment.
I also was advised to use the PowerPC lwz even when the alignment
is not know (and not STSRICT_ALIGNMENT).
The appended patch defines a clrmemsi pattern so that the rs6000
port can take control of clear_storage() using expand_block_clear(). It
also modifies expand_block_move() to use movsi and movhi for unaligned
accesses.
Comments?
David
* config/rs6000/rs6000-protos.h (expand_block_clear): Declare.
* config/rs6000/rs6000.md (clrmemsi): New pattern.
* config/rs6000/rs6000.c (expand_block_clear): New function.
(expand_block_move): Convert alignment to bits. Use SImode and
HImode for unaligned addresses if not STRICT_ALIGNMENT.
Index: rs6000-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000-protos.h,v
retrieving revision 1.85
diff -c -p -r1.85 rs6000-protos.h
*** rs6000-protos.h 28 Jul 2004 12:13:13 -0000 1.85
--- rs6000-protos.h 9 Aug 2004 20:19:48 -0000
*************** extern int input_operand (rtx, enum mach
*** 86,91 ****
--- 86,92 ----
extern int small_data_operand (rtx, enum machine_mode);
extern int s8bit_cint_operand (rtx, enum machine_mode);
extern bool legitimate_constant_pool_address_p (rtx);
+ extern int expand_block_clear (rtx[]);
extern int expand_block_move (rtx[]);
extern int load_multiple_operation (rtx, enum machine_mode);
extern const char * rs6000_output_load_multiple (rtx[]);
Index: rs6000.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.md,v
retrieving revision 1.315
diff -c -p -r1.315 rs6000.md
*** rs6000.md 2 Aug 2004 01:46:40 -0000 1.315
--- rs6000.md 9 Aug 2004 20:19:49 -0000
***************
*** 9093,9098 ****
--- 9137,9156 ----
"{stsi|stswi} %2,%1,%O0"
[(set_attr "type" "store")])
+ (define_expand "clrmemsi"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (const_int 0))
+ (use (match_operand:SI 1 "" ""))
+ (use (match_operand:SI 2 "" ""))])]
+ ""
+ "
+ {
+ if (expand_block_clear (operands))
+ DONE;
+ else
+ FAIL;
+ }")
+
;; String/block move insn.
;; Argument 0 is the destination
;; Argument 1 is the source
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.679
diff -c -p -r1.679 rs6000.c
*** rs6000.c 5 Aug 2004 17:05:48 -0000 1.679
--- rs6000.c 9 Aug 2004 20:48:26 -0000
*************** rs6000_init_libfuncs (void)
*** 8253,8258 ****
--- 8268,8358 ----
set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
}
}
+
+
+ /* Expand a block clear operation, and return 1 if successful. Return 0
+ if we should let the compiler generate normal code.
+
+ operands[0] is the destination
+ operands[1] is the length
+ operands[2] is the alignment */
+
+ int
+ expand_block_clear (rtx operands[])
+ {
+ rtx orig_dest = operands[0];
+ rtx bytes_rtx = operands[1];
+ rtx align_rtx = operands[2];
+ int constp = (GET_CODE (bytes_rtx) == CONST_INT);
+ int align;
+ int bytes;
+ int offset;
+ int clear_bytes;
+
+ /* If this is not a fixed size move, just call memcpy */
+ if (! constp)
+ return 0;
+
+ /* If this is not a fixed size alignment, abort */
+ if (GET_CODE (align_rtx) != CONST_INT)
+ abort ();
+ align = INTVAL (align_rtx) * BITS_PER_UNIT;
+
+ /* Anything to clear? */
+ bytes = INTVAL (bytes_rtx);
+ if (bytes <= 0)
+ return 1;
+
+ if (bytes > (TARGET_POWERPC64 && align >= 32 ? 64 : 32))
+ return 0;
+
+ if (optimize_size && bytes > 16)
+ return 0;
+
+ for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
+ {
+ union {
+ rtx (*mov) (rtx, rtx);
+ } gen_func;
+ enum machine_mode mode = BLKmode;
+ rtx dest;
+
+ if (bytes >= 8 && TARGET_POWERPC64
+ /* 64-bit loads and stores require word-aligned
+ displacements. */
+ && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+ {
+ clear_bytes = 8;
+ mode = DImode;
+ gen_func.mov = gen_movdi;
+ }
+ else if (bytes >= 4 && !STRICT_ALIGNMENT)
+ { /* move 4 bytes */
+ clear_bytes = 4;
+ mode = SImode;
+ gen_func.mov = gen_movsi;
+ }
+ else if (bytes == 2 && !STRICT_ALIGNMENT)
+ { /* move 2 bytes */
+ clear_bytes = 2;
+ mode = HImode;
+ gen_func.mov = gen_movhi;
+ }
+ else /* move 1 byte at a time */
+ {
+ clear_bytes = 1;
+ mode = QImode;
+ gen_func.mov = gen_movqi;
+ }
+
+ dest = adjust_address (orig_dest, mode, offset);
+
+ emit_insn ((*gen_func.mov) (dest, const0_rtx));
+ }
+
+ return 1;
+ }
+
/* Expand a block move operation, and return 1 if successful. Return 0
if we should let the compiler generate normal code.
*************** expand_block_move (rtx operands[])
*** 8286,8292 ****
/* If this is not a fixed size alignment, abort */
if (GET_CODE (align_rtx) != CONST_INT)
abort ();
! align = INTVAL (align_rtx);
/* Anything to move? */
bytes = INTVAL (bytes_rtx);
--- 8386,8392 ----
/* If this is not a fixed size alignment, abort */
if (GET_CODE (align_rtx) != CONST_INT)
abort ();
! align = INTVAL (align_rtx) * BITS_PER_UNIT;
/* Anything to move? */
bytes = INTVAL (bytes_rtx);
*************** expand_block_move (rtx operands[])
*** 8346,8352 ****
else if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
displacements. */
! && (align >= 8 || (! STRICT_ALIGNMENT && align >= 4)))
{
move_bytes = 8;
mode = DImode;
--- 8446,8452 ----
else if (bytes >= 8 && TARGET_POWERPC64
/* 64-bit loads and stores require word-aligned
displacements. */
! && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
{
move_bytes = 8;
mode = DImode;
*************** expand_block_move (rtx operands[])
*** 8357,8369 ****
move_bytes = (bytes > 8) ? 8 : bytes;
gen_func.movmemsi = gen_movmemsi_2reg;
}
! else if (bytes >= 4 && (align >= 4 || ! STRICT_ALIGNMENT))
{ /* move 4 bytes */
move_bytes = 4;
mode = SImode;
gen_func.mov = gen_movsi;
}
! else if (bytes == 2 && (align >= 2 || ! STRICT_ALIGNMENT))
{ /* move 2 bytes */
move_bytes = 2;
mode = HImode;
--- 8457,8469 ----
move_bytes = (bytes > 8) ? 8 : bytes;
gen_func.movmemsi = gen_movmemsi_2reg;
}
! else if (bytes >= 4 && !STRICT_ALIGNMENT)
{ /* move 4 bytes */
move_bytes = 4;
mode = SImode;
gen_func.mov = gen_movsi;
}
! else if (bytes == 2 && !STRICT_ALIGNMENT)
{ /* move 2 bytes */
move_bytes = 2;
mode = HImode;