This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Committed] S/390: Use the tuned glibc versions of memcpy and memcmp
- From: "Andreas Krebbel" <krebbel at linux dot vnet dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 5 Jul 2012 12:50:41 +0200
- Subject: [Committed] S/390: Use the tuned glibc versions of memcpy and memcmp
Hi,
with this patch the inline versions of memcpy and memcmp will only be
used for constant size parameters up to 64k. In the other cases glibc
hopefully will be able to come up with an CPU (z10 and z196) optimized
version dynamically via ifunc.
Committed to mainline.
Bye,
-Andreas-
2012-07-05 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
* config/s390/s390-protos.h (s390_expand_movmem)
(s390_expand_cmpmem): Add return value.
* config/s390/s390.c (s390_expand_movmem, s390_expand_cmpmem):
Return FALSE to use the library function in some cases.
* config/s390/s390.md (movmem, cmpmem): Evaluate return value of C
helper functions.
---
gcc/config/s390/s390-protos.h | 4 !!!!
gcc/config/s390/s390.c | 20 ++++++++++++++++!!!!
gcc/config/s390/s390.md | 16 !!!!!!!!!!!!!!!!
3 files changed, 16 insertions(+), 24 modifications(!)
Index: gcc/config/s390/s390.c
===================================================================
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*************** legitimize_reload_address (rtx ad, enum
*** 3986,3994 ****
/* Emit code to move LEN bytes from DST to SRC. */
! void
s390_expand_movmem (rtx dst, rtx src, rtx len)
{
if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
{
if (INTVAL (len) > 0)
--- 3986,4001 ----
/* Emit code to move LEN bytes from DST to SRC. */
! bool
s390_expand_movmem (rtx dst, rtx src, rtx len)
{
+ /* When tuning for z10 or higher we rely on the Glibc functions to
+ do the right thing. Only for constant lengths below 64k we will
+ generate inline code. */
+ if (s390_tune >= PROCESSOR_2097_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+ return false;
+
if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
{
if (INTVAL (len) > 0)
*************** s390_expand_movmem (rtx dst, rtx src, rt
*** 4080,4085 ****
--- 4087,4093 ----
convert_to_mode (Pmode, count, 1)));
emit_label (end_label);
}
+ return true;
}
/* Emit code to set LEN bytes at DST to VAL.
*************** s390_expand_setmem (rtx dst, rtx len, rt
*** 4218,4229 ****
/* Emit code to compare LEN bytes at OP0 with those at OP1,
and return the result in TARGET. */
! void
s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
{
rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
rtx tmp;
/* As the result of CMPINT is inverted compared to what we need,
we have to swap the operands. */
tmp = op0; op0 = op1; op1 = tmp;
--- 4226,4244 ----
/* Emit code to compare LEN bytes at OP0 with those at OP1,
and return the result in TARGET. */
! bool
s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
{
rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
rtx tmp;
+ /* When tuning for z10 or higher we rely on the Glibc functions to
+ do the right thing. Only for constant lengths below 64k we will
+ generate inline code. */
+ if (s390_tune >= PROCESSOR_2097_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+ return false;
+
/* As the result of CMPINT is inverted compared to what we need,
we have to swap the operands. */
tmp = op0; op0 = op1; op1 = tmp;
*************** s390_expand_cmpmem (rtx target, rtx op0,
*** 4331,4336 ****
--- 4346,4352 ----
emit_insn (gen_cmpint (target, ccreg));
}
+ return true;
}
Index: gcc/config/s390/s390.md
===================================================================
*** gcc/config/s390/s390.md.orig
--- gcc/config/s390/s390.md
***************
*** 2640,2646 ****
(use (match_operand:GPR 2 "general_operand" "")) ; count
(match_operand 3 "" "")]
""
! "s390_expand_movmem (operands[0], operands[1], operands[2]); DONE;")
; Move a block that is up to 256 bytes in length.
; The block length is taken as (operands[2] % 256) + 1.
--- 2640,2651 ----
(use (match_operand:GPR 2 "general_operand" "")) ; count
(match_operand 3 "" "")]
""
! {
! if (s390_expand_movmem (operands[0], operands[1], operands[2]))
! DONE;
! else
! FAIL;
! })
; Move a block that is up to 256 bytes in length.
; The block length is taken as (operands[2] % 256) + 1.
***************
*** 3017,3024 ****
(use (match_operand:SI 3 "general_operand" ""))
(use (match_operand:SI 4 "" ""))]
""
! "s390_expand_cmpmem (operands[0], operands[1],
! operands[2], operands[3]); DONE;")
; Compare a block that is up to 256 bytes in length.
; The block length is taken as (operands[2] % 256) + 1.
--- 3022,3034 ----
(use (match_operand:SI 3 "general_operand" ""))
(use (match_operand:SI 4 "" ""))]
""
! {
! if (s390_expand_cmpmem (operands[0], operands[1],
! operands[2], operands[3]))
! DONE;
! else
! FAIL;
! })
; Compare a block that is up to 256 bytes in length.
; The block length is taken as (operands[2] % 256) + 1.
Index: gcc/config/s390/s390-protos.h
===================================================================
*** gcc/config/s390/s390-protos.h.orig
--- gcc/config/s390/s390-protos.h
*************** extern void s390_reload_symref_address (
*** 80,88 ****
extern void s390_expand_plus_operand (rtx, rtx, rtx);
extern void emit_symbolic_move (rtx *);
extern void s390_load_address (rtx, rtx);
! extern void s390_expand_movmem (rtx, rtx, rtx);
extern void s390_expand_setmem (rtx, rtx, rtx);
! extern void s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx);
--- 80,88 ----
extern void s390_expand_plus_operand (rtx, rtx, rtx);
extern void emit_symbolic_move (rtx *);
extern void s390_load_address (rtx, rtx);
! extern bool s390_expand_movmem (rtx, rtx, rtx);
extern void s390_expand_setmem (rtx, rtx, rtx);
! extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx);