This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Patch to support PowerPC 4xx dlmzb instruction
- From: "Joseph S. Myers" <joseph at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 12 Mar 2006 22:03:26 +0000 (UTC)
- Subject: Patch to support PowerPC 4xx dlmzb instruction
The IBM PowerPC 405 and 440 processors have an instruction "dlmzb"
(determine left-most zero byte) for implementing string operations
(see e.g.
<http://www-03.ibm.com/chips/power/powerpc/newsletter/sep2003/ppc_process_at_work2.html>
for a strcpy implementation). This patch adds support for this
instruction, and makes GCC use it to inline aligned strlen operations
on those processors.
Because some PowerPC targets are at the limit of available
target_flags bits, we can't just add a new one for this instruction.
However, the table processor_target_table in rs6000_override_options
means that for consistency with other such optional instructions we
should use a target_flags bit for this instruction, so I moved a bit
not used in that table (TARGET_UPDATE) into a separate variable to
make room for the new bit for this instruction.
Tested with no regressions with cross-compilers to
powerpc-ibm-linux-gnu (--with-cpu=440). OK to commit?
2006-03-12 Joseph S. Myers <joseph@codesourcery.com>
* config/rs6000/rs6000.opt (mdlmzb): New option.
(mupdate, mno-update): Use Var not Mask.
* doc/invoke.texi (-mdlmzb): Document.
* config/rs6000/rs6000.c (rs6000_override_options): Enable -mdlmzb
for 405 and 440.
* config/rs6000/rs6000.md: Add dlmzb support for 405 and 440.
2006-03-12 Joseph S. Myers <joseph@codesourcery.com>
* gcc.target/powerpc/405-dlmzb-strlen-1.c,
gcc.target/powerpc/440-dlmzb-strlen-1.c: New tests.
diff -rupN GCC.orig/gcc/config/rs6000/rs6000.c GCC/gcc/config/rs6000/rs6000.c
--- GCC.orig/gcc/config/rs6000/rs6000.c 2006-02-24 16:30:16.000000000 +0000
+++ GCC/gcc/config/rs6000/rs6000.c 2006-03-12 19:58:34.000000000 +0000
@@ -1135,11 +1135,13 @@ rs6000_override_options (const char *def
{"403", PROCESSOR_PPC403,
POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_STRICT_ALIGN},
{"405", PROCESSOR_PPC405,
- POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW},
- {"405fp", PROCESSOR_PPC405, POWERPC_BASE_MASK | MASK_MULHW},
+ POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
+ {"405fp", PROCESSOR_PPC405,
+ POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
{"440", PROCESSOR_PPC440,
- POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW},
- {"440fp", PROCESSOR_PPC440, POWERPC_BASE_MASK | MASK_MULHW},
+ POWERPC_BASE_MASK | MASK_SOFT_FLOAT | MASK_MULHW | MASK_DLMZB},
+ {"440fp", PROCESSOR_PPC440,
+ POWERPC_BASE_MASK | MASK_MULHW | MASK_DLMZB},
{"505", PROCESSOR_MPCCORE, POWERPC_BASE_MASK},
{"601", PROCESSOR_PPC601,
MASK_POWER | POWERPC_BASE_MASK | MASK_MULTIPLE | MASK_STRING},
@@ -1209,7 +1211,8 @@ rs6000_override_options (const char *def
POWER_MASKS = MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING,
POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT
| MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC
- | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW)
+ | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW
+ | MASK_DLMZB)
};
rs6000_init_hard_regno_mode_ok ();
diff -rupN GCC.orig/gcc/config/rs6000/rs6000.md GCC/gcc/config/rs6000/rs6000.md
--- GCC.orig/gcc/config/rs6000/rs6000.md 2006-02-19 14:05:07.000000000 +0000
+++ GCC/gcc/config/rs6000/rs6000.md 2006-03-12 20:02:28.000000000 +0000
@@ -69,6 +69,9 @@
(UNSPEC_CMPXCHG 42)
(UNSPEC_XCHG 43)
(UNSPEC_AND 44)
+ (UNSPEC_DLMZB 45)
+ (UNSPEC_DLMZB_CR 46)
+ (UNSPEC_DLMZB_STRLEN 47)
])
;;
@@ -1343,6 +1346,72 @@
"mullhwu %0, %1, %2"
[(set_attr "type" "imul3")])
+;; IBM 405 and 440 string-search dlmzb instruction support.
+(define_insn "dlmzb"
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
+ (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r")
+ (match_operand:SI 2 "gpc_reg_operand" "r")]
+ UNSPEC_DLMZB_CR))
+ (set (match_operand:SI 0 "gpc_reg_operand" "=r")
+ (unspec:SI [(match_dup 1)
+ (match_dup 2)]
+ UNSPEC_DLMZB))]
+ "TARGET_DLMZB"
+ "dlmzb. %0, %1, %2")
+
+(define_expand "strlensi"
+ [(set (match_operand:SI 0 "gpc_reg_operand" "")
+ (unspec:SI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")]
+ UNSPEC_DLMZB_STRLEN))
+ (clobber (match_scratch:CC 4 "=x"))]
+ "TARGET_DLMZB && WORDS_BIG_ENDIAN && !optimize_size"
+{
+ rtx result = operands[0];
+ rtx src = operands[1];
+ rtx search_char = operands[2];
+ rtx align = operands[3];
+ rtx addr, scratch_string, word1, word2, scratch_dlmzb;
+ rtx loop_label, end_label, mem, cr0, cond;
+ if (search_char != const0_rtx
+ || GET_CODE (align) != CONST_INT
+ || INTVAL (align) < 8)
+ FAIL;
+ word1 = gen_reg_rtx (SImode);
+ word2 = gen_reg_rtx (SImode);
+ scratch_dlmzb = gen_reg_rtx (SImode);
+ scratch_string = gen_reg_rtx (Pmode);
+ loop_label = gen_label_rtx ();
+ end_label = gen_label_rtx ();
+ addr = force_reg (Pmode, XEXP (src, 0));
+ emit_move_insn (scratch_string, addr);
+ emit_label (loop_label);
+ mem = change_address (src, SImode, scratch_string);
+ emit_move_insn (word1, mem);
+ emit_move_insn (word2, adjust_address (mem, SImode, 4));
+ cr0 = gen_rtx_REG (CCmode, CR0_REGNO);
+ emit_insn (gen_dlmzb (scratch_dlmzb, word1, word2, cr0));
+ cond = gen_rtx_NE (VOIDmode, cr0, const0_rtx);
+ emit_jump_insn (gen_rtx_SET (VOIDmode,
+ pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ cond,
+ gen_rtx_LABEL_REF
+ (VOIDmode,
+ end_label),
+ pc_rtx)));
+ emit_insn (gen_addsi3 (scratch_string, scratch_string, GEN_INT (8)));
+ emit_jump_insn (gen_rtx_SET (VOIDmode,
+ pc_rtx,
+ gen_rtx_LABEL_REF (VOIDmode, loop_label)));
+ emit_label (end_label);
+ emit_insn (gen_addsi3 (scratch_string, scratch_string, scratch_dlmzb));
+ emit_insn (gen_subsi3 (result, scratch_string, addr));
+ emit_insn (gen_subsi3 (result, result, const1_rtx));
+ DONE;
+})
+
(define_split
[(set (match_operand:CC 2 "cc_reg_not_cr0_operand" "")
(compare:CC (sign_extend:SI (match_operand:HI 1 "gpc_reg_operand" ""))
diff -rupN GCC.orig/gcc/config/rs6000/rs6000.opt GCC/gcc/config/rs6000/rs6000.opt
--- GCC.orig/gcc/config/rs6000/rs6000.opt 2005-11-22 00:32:09.000000000 +0000
+++ GCC/gcc/config/rs6000/rs6000.opt 2006-03-12 20:00:53.000000000 +0000
@@ -72,6 +72,10 @@ mmulhw
Target Report Mask(MULHW)
Use 4xx half-word multiply instructions
+mdlmzb
+Target Report Mask(DLMZB)
+Use 4xx string-search dlmzb instruction
+
mmultiple
Target Report Mask(MULTIPLE)
Generate load/store multiple instructions
@@ -97,11 +101,11 @@ Target Report RejectNegative InverseMask
Use hardware floating point
mno-update
-Target Report RejectNegative Mask(NO_UPDATE)
+Target Report RejectNegative Var(TARGET_UPDATE,0) Init(1)
Do not generate load/store with update instructions
mupdate
-Target Report RejectNegative InverseMask(NO_UPDATE, UPDATE)
+Target Report RejectNegative Var(TARGET_UPDATE,1) VarExists
Generate load/store with update instructions
mno-fused-madd
diff -rupN GCC.orig/gcc/doc/invoke.texi GCC/gcc/doc/invoke.texi
--- GCC.orig/gcc/doc/invoke.texi 2006-03-02 22:56:44.000000000 +0000
+++ GCC/gcc/doc/invoke.texi 2006-03-12 19:57:48.000000000 +0000
@@ -670,6 +670,7 @@ See RS/6000 and PowerPC Options.
-mspe=yes -mspe=no @gol
-mvrsave -mno-vrsave @gol
-mmulhw -mno-mulhw @gol
+-mdlmzb -mno-dlmzb @gol
-mfloat-gprs=yes -mfloat-gprs=no -mfloat-gprs=single -mfloat-gprs=double @gol
-mprototype -mno-prototype @gol
-msim -mmvme -mads -myellowknife -memb -msdata @gol
@@ -11113,7 +11114,7 @@ following options: @option{-maltivec}, @
@option{-mhard-float}, @option{-mmfcrf}, @option{-mmultiple},
@option{-mnew-mnemonics}, @option{-mpopcntb}, @option{-mpower},
@option{-mpower2}, @option{-mpowerpc64}, @option{-mpowerpc-gpopt},
-@option{-mpowerpc-gfxopt}, @option{-mstring}, @option{-mmulhw}.
+@option{-mpowerpc-gfxopt}, @option{-mstring}, @option{-mmulhw}, @option{dlmzb}.
The particular options
set for any particular CPU will vary between compiler versions,
depending on what setting seems to produce optimal code for that CPU;
@@ -11377,6 +11378,14 @@ multiply-accumulate instructions on the
These instructions are generated by default when targetting those
processors.
+@item -mdlmzb
+@itemx -mno-dlmzb
+@opindex mdlmzb
+@opindex mno-dlmzb
+Generate code that uses (does not use) the string-search @samp{dlmzb}
+instruction on the IBM 405 and 440 processors. This instruction is
+generated by default when targetting those processors.
+
@item -mno-bit-align
@itemx -mbit-align
@opindex mno-bit-align
diff -rupN GCC.orig/gcc/testsuite/gcc.target/powerpc/405-dlmzb-strlen-1.c GCC/gcc/testsuite/gcc.target/powerpc/405-dlmzb-strlen-1.c
--- GCC.orig/gcc/testsuite/gcc.target/powerpc/405-dlmzb-strlen-1.c 1970-01-01 00:00:00.000000000 +0000
+++ GCC/gcc/testsuite/gcc.target/powerpc/405-dlmzb-strlen-1.c 2006-03-12 19:56:53.000000000 +0000
@@ -0,0 +1,17 @@
+/* Test generation of dlmzb for strlen on 405. */
+/* Origin: Joseph Myers <joseph@codesourcery.com> */
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -mcpu=405" } */
+
+/* { dg-final { scan-assembler "dlmzb\\. " } } */
+
+typedef __SIZE_TYPE__ size_t;
+
+size_t strlen(const char *);
+
+size_t
+strlen8(const long long *s)
+{
+ return strlen((const char *)s);
+}
diff -rupN GCC.orig/gcc/testsuite/gcc.target/powerpc/440-dlmzb-strlen-1.c GCC/gcc/testsuite/gcc.target/powerpc/440-dlmzb-strlen-1.c
--- GCC.orig/gcc/testsuite/gcc.target/powerpc/440-dlmzb-strlen-1.c 1970-01-01 00:00:00.000000000 +0000
+++ GCC/gcc/testsuite/gcc.target/powerpc/440-dlmzb-strlen-1.c 2006-03-12 19:56:53.000000000 +0000
@@ -0,0 +1,17 @@
+/* Test generation of dlmzb for strlen on 440. */
+/* Origin: Joseph Myers <joseph@codesourcery.com> */
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -mcpu=440" } */
+
+/* { dg-final { scan-assembler "dlmzb\\. " } } */
+
+typedef __SIZE_TYPE__ size_t;
+
+size_t strlen(const char *);
+
+size_t
+strlen8(const long long *s)
+{
+ return strlen((const char *)s);
+}
--
Joseph S. Myers http://www.srcf.ucam.org/~jsm28/gcc/
jsm@polyomino.org.uk (personal mail)
joseph@codesourcery.com (CodeSourcery mail)
jsm28@gcc.gnu.org (Bugzilla assignments and CCs)