Patch to allow strlen expander to fail
Zack Weinberg
zack@wolery.cumb.org
Sun Mar 5 18:27:00 GMT 2000
On Sun, Mar 05, 2000 at 06:12:43PM +0100, Jan Hubicka wrote:
> Hi
> This patch adds the strlen libcall and necesary code to emit_builtin_strlen
> to emit libcall when strlen expander fails.
> This avoids problems with expr expanded twice.
>
> Together with the strlensi patch it ought to make gcc to deal sanely
> with strlen on i386.
>
> Honza
>
> Sat Aug 5 18:10:24 CET 2000 Jan Hubicka <jh@suse.cz>
> * optabs.c (init_optabs): Add strlen_libfunc.
> * expr.h (enum libfunc_index): Add LTI_strlen
> (strlen_libfunc): Nef macro.
> * builtins.c (expand_builtin_strlen): Use libcall when
> strlen expander failed.
This is somewhat mangled and has a thinko in it. get_memory_rtx()
returns a MEM, so we mustn't wrap it in another MEM when calling the
strlensi expander. With that changed, and i386.c/i386.md cleaned up a
bit, we get as far as stage2 - and then genattrtab dumps core. :(
stage1/xgcc -Bstage1/ -B/work/inst/i686-pc-linux-gnu/bin/ -DIN_GCC \
-W -Wall -Wtraditional -O2 -g -W -Wall -Wtraditional -DHAVE_CONFIG_H \
-o genattrtab genattrtab.o rtl.o bitmap.o ggc-none.o print-rtl.o \
errors.o obstack.o
./genattrtab /work/src/gcc/gcc/config/i386/i386.md > tmp-attrtab.c
Segmentation fault
make[1]: *** [s-attrtab] Error 139
Program received signal SIGSEGV, Segmentation fault.
0x804b028 in expand_units () at /work/src/gcc/gcc/genattrtab.c:1993
1993 if (op_array[j-1]->ready < op_array[j]->ready)
(gdb) p (*op_array)@num
$2 = {
0x80c16d8, 0x8061864, 0x1, 0x8063b30, 0x8067d04, 0x8067c90, 0x8067c38,
0x806792c, 0x8067948, 0x8067880, 0x806789c, 0x80678b8, 0x80678d4, 0x806780c,
0x80677b4, 0x8067828, 0x80643d8, 0x80643f4, 0x8064410, 0x8064348, 0x8064364,
0x8064380, 0x80642d4, 0x80642f0, 0x806427c, 0x8064208, 0x8064224, 0x806442c,
0x8064464, 0x8064194, 0x80641b0, 0x8064448, 0x8064480, 0x80640cc, 0x8064074,
0x8064000, 0x806401c, 0x8063ec8, 0x8063ee4, 0x8063f00, 0x8063f1c, 0x8063f38,
0x8063f54, 0x8063f70, 0x8063f8c, 0x8063fa8, 0x80639c4, 0x80638c8, 0x806391c,
0x80639e0, 0x8063870, 0x8063900, 0x8063954, 0x806378c, 0x80637c4, 0x8063818,
0x8063a88, 0x8063aa4, 0x8063ac0, 0x8063af8, 0x8063b4c, 0x80640e8, 0x8064104,
0x8064120, 0x806413c, 0x80626fc, 0x8063adc, 0x8063b14, 0x8062734, 0x80626a4,
0x8062718, 0x80637a8, 0x80638e4, 0x80637e0, 0x80637fc, 0x8063a18, 0x8063938,
0x80639fc, 0x8063a34, 0x8063a6c, 0x8063a50
}
(gdb) p i
$3 = 0
(gdb) p j
$4 = 3
(gdb) p op_array[j-1]
$5 = (struct function_unit_op *) 0x1
The patch I'm running with is appended.
zw
===================================================================
Index: builtins.c
--- builtins.c 2000/02/29 02:34:46 1.32
+++ builtins.c 2000/03/06 02:24:08
@@ -1334,6 +1334,7 @@ expand_builtin_strlen (exp, target, mode
return 0;
else
{
+ rtx pat;
tree src = TREE_VALUE (arglist);
tree len = c_strlen (src);
@@ -1377,13 +1378,8 @@ expand_builtin_strlen (exp, target, mode
if (! (*insn_data[(int)icode].operand[0].predicate) (result, insn_mode))
result = gen_reg_rtx (insn_mode);
- src_rtx = memory_address (BLKmode,
- expand_expr (src, NULL_RTX, ptr_mode,
- EXPAND_NORMAL));
+ src_rtx = get_memory_rtx (src);
- if (! (*insn_data[(int)icode].operand[1].predicate) (src_rtx, Pmode))
- src_rtx = copy_to_mode_reg (Pmode, src_rtx);
-
/* Check the string is readable and has an end. */
if (current_function_check_memory_usage)
emit_library_call (chkr_check_str_libfunc, 1, VOIDmode, 2,
@@ -1396,9 +1392,13 @@ expand_builtin_strlen (exp, target, mode
if (! (*insn_data[(int)icode].operand[2].predicate) (char_rtx, char_mode))
char_rtx = copy_to_mode_reg (char_mode, char_rtx);
- emit_insn (GEN_FCN (icode) (result,
- gen_rtx_MEM (BLKmode, src_rtx),
- char_rtx, GEN_INT (align)));
+ pat = GEN_FCN (icode) (result, src_rtx, char_rtx, GEN_INT (align));
+ if (pat)
+ emit_insn (pat);
+ else
+ emit_library_call_value (strlen_libfunc, result, 0,
+ value_mode, 1,
+ XEXP (src_rtx, 0), Pmode);
/* Return the value in the proper mode for this function. */
if (GET_MODE (result) == value_mode)
===================================================================
Index: expr.h
--- expr.h 2000/02/27 21:39:35 1.59
+++ expr.h 2000/03/06 02:24:10
@@ -486,6 +486,7 @@ enum libfunc_index
LTI_bcmp,
LTI_memset,
LTI_bzero,
+ LTI_strlen,
LTI_throw,
LTI_rethrow,
@@ -619,6 +620,7 @@ extern rtx libfunc_table[LTI_MAX];
#define bcmp_libfunc (libfunc_table[LTI_bcmp])
#define memset_libfunc (libfunc_table[LTI_memset])
#define bzero_libfunc (libfunc_table[LTI_bzero])
+#define strlen_libfunc (libfunc_table[LTI_strlen])
#define throw_libfunc (libfunc_table[LTI_throw])
#define rethrow_libfunc (libfunc_table[LTI_rethrow])
===================================================================
Index: genattrtab.c
--- genattrtab.c 2000/02/26 13:50:42 1.71
+++ genattrtab.c 2000/03/06 02:24:11
@@ -1962,6 +1962,7 @@ expand_units ()
unit_num[unit->num] = unit;
unit_ops[unit->num] = op_array = (struct function_unit_op **)
alloca (unit->num_opclasses * sizeof (struct function_unit_op *));
+ memset (op_array, 0, unit->num_opclasses * sizeof (struct function_unit_op *));
for (op = unit->ops; op; op = op->next)
op_array[op->num] = op;
===================================================================
Index: optabs.c
--- optabs.c 2000/02/26 13:55:09 1.63
+++ optabs.c 2000/03/06 02:24:12
@@ -4668,6 +4668,7 @@ init_optabs ()
bcmp_libfunc = init_one_libfunc ("__gcc_bcmp");
memset_libfunc = init_one_libfunc ("memset");
bzero_libfunc = init_one_libfunc ("bzero");
+ strlen_libfunc = init_one_libfunc ("strlen");
throw_libfunc = init_one_libfunc ("__throw");
rethrow_libfunc = init_one_libfunc ("__rethrow");
===================================================================
Index: config/i386/i386.c
--- config/i386/i386.c 2000/03/03 00:54:46 1.141
+++ config/i386/i386.c 2000/03/06 02:24:14
@@ -5481,128 +5481,19 @@ void
ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
rtx out, align_rtx, scratch;
{
- int align;
rtx tmp;
- rtx align_2_label = NULL_RTX;
- rtx align_3_label = NULL_RTX;
- rtx align_4_label = gen_label_rtx ();
- rtx end_0_label = gen_label_rtx ();
+ rtx loop_label = gen_label_rtx ();
rtx mem;
- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
rtx tmpreg = gen_reg_rtx (SImode);
- align = 0;
- if (GET_CODE (align_rtx) == CONST_INT)
- align = INTVAL (align_rtx);
-
- /* Loop to check 1..3 bytes for null to get an aligned pointer. */
-
- /* Is there a known alignment and is it less than 4? */
- if (align < 4)
- {
- /* Is there a known alignment and is it not 2? */
- if (align != 2)
- {
- align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
- align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
-
- /* Leave just the 3 lower bits. */
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode,
- align_4_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
- emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode,
- align_2_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
- tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode,
- align_3_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
- }
- else
- {
- /* Since the alignment is 2, we have to check 2 or 0 bytes;
- check if is aligned to 4 - byte. */
-
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode,
- align_4_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
- }
-
- mem = gen_rtx_MEM (QImode, out);
-
- /* Now compare the bytes. */
-
- /* Compare the first n unaligned byte on a byte per byte basis. */
- emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, end_0_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
- /* Increment the address. */
- emit_insn (gen_addsi3 (out, out, const1_rtx));
-
- /* Not needed with an alignment of 2 */
- if (align != 2)
- {
- emit_label (align_2_label);
-
- emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode,
- end_0_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
- emit_insn (gen_addsi3 (out, out, const1_rtx));
-
- emit_label (align_3_label);
- }
-
- emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
- tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, end_0_label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
- emit_insn (gen_addsi3 (out, out, const1_rtx));
- }
-
+ if (GET_CODE (align_rtx) != CONST_INT
+ || INTVAL (align_rtx) < 4)
+ abort ();
+
/* Generate loop to check 4 bytes at a time. It is not a good idea to
align this loop. It gives only huge programs, but does not help to
speed up. */
- emit_label (align_4_label);
+ emit_label (loop_label);
mem = gen_rtx_MEM (SImode, out);
emit_move_insn (scratch, mem);
@@ -5615,7 +5506,8 @@ ix86_expand_strlensi_unroll_1 (out, alig
emit_insn (gen_one_cmplsi2 (scratch, scratch));
emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
- emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
+ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0,
+ loop_label);
if (TARGET_CMOVE)
{
@@ -5641,7 +5533,6 @@ ix86_expand_strlensi_unroll_1 (out, alig
gen_rtx_IF_THEN_ELSE (SImode, tmp,
reg,
out)));
-
}
else
{
@@ -5669,8 +5560,6 @@ ix86_expand_strlensi_unroll_1 (out, alig
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
-
- emit_label (end_0_label);
}
/* Clear stack slot assignments remembered from previous functions.
===================================================================
Index: config/i386/i386.md
--- config/i386/i386.md 2000/03/01 11:15:52 1.140
+++ config/i386/i386.md 2000/03/06 02:24:15
@@ -8593,36 +8593,21 @@
{
rtx out, addr, eoschar, align, scratch1, scratch2, scratch3;
- /* The generic case of strlen expander is long. Avoid it's
- expanding unless TARGET_INLINE_ALL_STRINGOPS. */
-
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !optimize_size
- && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
- FAIL;
-
out = operands[0];
addr = force_reg (Pmode, XEXP (operands[1], 0));
eoschar = operands[2];
align = operands[3];
scratch1 = gen_reg_rtx (SImode);
- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
- && !optimize_size)
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx)
{
- /* Well it seems that some optimizer does not combine a call like
- foo(strlen(bar), strlen(bar));
- when the move and the subtraction is done here. It does calculate
- the length just once when these instructions are done inside of
- output_strlen_unroll(). But I think since &bar[strlen(bar)] is
- often used and I use one fewer register for the lifetime of
- output_strlen_unroll() this is better. */
+ /* This version is not a win if optimizing for size, or if
+ the string is not properly aligned. */
+ if (optimize <= 1 || optimize_size
+ || GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
+ FAIL;
- if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
- emit_move_insn (scratch1, addr);
-
emit_move_insn (out, addr);
-
ix86_expand_strlensi_unroll_1 (out, align, scratch1);
/* strlensi_unroll_1 returns the address of the zero at the end of
More information about the Gcc-patches
mailing list