Patch to allow strlen expander to fail

Zack Weinberg zack@wolery.cumb.org
Sun Mar 5 18:27:00 GMT 2000


On Sun, Mar 05, 2000 at 06:12:43PM +0100, Jan Hubicka wrote:
> Hi
> This patch adds the strlen libcall and necesary code to emit_builtin_strlen
> to emit libcall when strlen expander fails.
> This avoids problems with expr expanded twice.
> 
> Together with the strlensi patch it ought to make gcc to deal sanely
> with strlen on i386.
> 
> Honza
> 
> Sat Aug  5 18:10:24 CET 2000  Jan Hubicka  <jh@suse.cz>
> 	* optabs.c (init_optabs): Add strlen_libfunc.
> 	* expr.h (enum libfunc_index): Add LTI_strlen
> 	(strlen_libfunc): Nef macro.
> 	* builtins.c (expand_builtin_strlen): Use libcall when
> 	strlen expander failed.

This is somewhat mangled and has a thinko in it.  get_memory_rtx()
returns a MEM, so we mustn't wrap it in another MEM when calling the
strlensi expander.  With that changed, and i386.c/i386.md cleaned up a
bit, we get as far as stage2 - and then genattrtab dumps core. :(

stage1/xgcc -Bstage1/ -B/work/inst/i686-pc-linux-gnu/bin/  -DIN_GCC  \
  -W -Wall -Wtraditional -O2 -g -W -Wall -Wtraditional  -DHAVE_CONFIG_H \
  -o genattrtab genattrtab.o rtl.o bitmap.o ggc-none.o print-rtl.o \
  errors.o obstack.o 
./genattrtab /work/src/gcc/gcc/config/i386/i386.md > tmp-attrtab.c
Segmentation fault
make[1]: *** [s-attrtab] Error 139

Program received signal SIGSEGV, Segmentation fault.
0x804b028 in expand_units () at /work/src/gcc/gcc/genattrtab.c:1993
1993              if (op_array[j-1]->ready < op_array[j]->ready)
(gdb)  p (*op_array)@num
$2 = {
  0x80c16d8, 0x8061864, 0x1,       0x8063b30, 0x8067d04, 0x8067c90, 0x8067c38, 
  0x806792c, 0x8067948, 0x8067880, 0x806789c, 0x80678b8, 0x80678d4, 0x806780c, 
  0x80677b4, 0x8067828, 0x80643d8, 0x80643f4, 0x8064410, 0x8064348, 0x8064364, 
  0x8064380, 0x80642d4, 0x80642f0, 0x806427c, 0x8064208, 0x8064224, 0x806442c, 
  0x8064464, 0x8064194, 0x80641b0, 0x8064448, 0x8064480, 0x80640cc, 0x8064074, 
  0x8064000, 0x806401c, 0x8063ec8, 0x8063ee4, 0x8063f00, 0x8063f1c, 0x8063f38, 
  0x8063f54, 0x8063f70, 0x8063f8c, 0x8063fa8, 0x80639c4, 0x80638c8, 0x806391c, 
  0x80639e0, 0x8063870, 0x8063900, 0x8063954, 0x806378c, 0x80637c4, 0x8063818, 
  0x8063a88, 0x8063aa4, 0x8063ac0, 0x8063af8, 0x8063b4c, 0x80640e8, 0x8064104, 
  0x8064120, 0x806413c, 0x80626fc, 0x8063adc, 0x8063b14, 0x8062734, 0x80626a4, 
  0x8062718, 0x80637a8, 0x80638e4, 0x80637e0, 0x80637fc, 0x8063a18, 0x8063938, 
  0x80639fc, 0x8063a34, 0x8063a6c, 0x8063a50
}
(gdb) p i
$3 = 0
(gdb) p j
$4 = 3
(gdb) p op_array[j-1]
$5 = (struct function_unit_op *) 0x1

The patch I'm running with is appended.

zw

===================================================================
Index: builtins.c
--- builtins.c	2000/02/29 02:34:46	1.32
+++ builtins.c	2000/03/06 02:24:08
@@ -1334,6 +1334,7 @@ expand_builtin_strlen (exp, target, mode
     return 0;
   else
     {
+      rtx pat;
       tree src = TREE_VALUE (arglist);
       tree len = c_strlen (src);
 
@@ -1377,13 +1378,8 @@ expand_builtin_strlen (exp, target, mode
 
       if (! (*insn_data[(int)icode].operand[0].predicate) (result, insn_mode))
 	result = gen_reg_rtx (insn_mode);
-      src_rtx = memory_address (BLKmode,
-				expand_expr (src, NULL_RTX, ptr_mode,
-					     EXPAND_NORMAL));
+      src_rtx = get_memory_rtx (src);
 
-      if (! (*insn_data[(int)icode].operand[1].predicate) (src_rtx, Pmode))
-	src_rtx = copy_to_mode_reg (Pmode, src_rtx);
-
       /* Check the string is readable and has an end.  */
       if (current_function_check_memory_usage)
 	emit_library_call (chkr_check_str_libfunc, 1, VOIDmode, 2,
@@ -1396,9 +1392,13 @@ expand_builtin_strlen (exp, target, mode
       if (! (*insn_data[(int)icode].operand[2].predicate) (char_rtx, char_mode))
 	char_rtx = copy_to_mode_reg (char_mode, char_rtx);
 
-      emit_insn (GEN_FCN (icode) (result,
-				  gen_rtx_MEM (BLKmode, src_rtx),
-				  char_rtx, GEN_INT (align)));
+      pat = GEN_FCN (icode) (result, src_rtx, char_rtx, GEN_INT (align));
+      if (pat)
+	emit_insn (pat);
+      else
+	emit_library_call_value (strlen_libfunc, result, 0,
+				 value_mode, 1,
+				 XEXP (src_rtx, 0), Pmode);
 
       /* Return the value in the proper mode for this function.  */
       if (GET_MODE (result) == value_mode)
===================================================================
Index: expr.h
--- expr.h	2000/02/27 21:39:35	1.59
+++ expr.h	2000/03/06 02:24:10
@@ -486,6 +486,7 @@ enum libfunc_index
   LTI_bcmp,
   LTI_memset,
   LTI_bzero,
+  LTI_strlen,
 
   LTI_throw,
   LTI_rethrow,
@@ -619,6 +620,7 @@ extern rtx libfunc_table[LTI_MAX];
 #define bcmp_libfunc	(libfunc_table[LTI_bcmp])
 #define memset_libfunc	(libfunc_table[LTI_memset])
 #define bzero_libfunc	(libfunc_table[LTI_bzero])
+#define strlen_libfunc	(libfunc_table[LTI_strlen])
 
 #define throw_libfunc	(libfunc_table[LTI_throw])
 #define rethrow_libfunc	(libfunc_table[LTI_rethrow])
===================================================================
Index: genattrtab.c
--- genattrtab.c	2000/02/26 13:50:42	1.71
+++ genattrtab.c	2000/03/06 02:24:11
@@ -1962,6 +1962,7 @@ expand_units ()
       unit_num[unit->num] = unit;
       unit_ops[unit->num] = op_array = (struct function_unit_op **)
 	alloca (unit->num_opclasses * sizeof (struct function_unit_op *));
+      memset (op_array, 0, unit->num_opclasses * sizeof (struct function_unit_op *));
 
       for (op = unit->ops; op; op = op->next)
 	op_array[op->num] = op;
===================================================================
Index: optabs.c
--- optabs.c	2000/02/26 13:55:09	1.63
+++ optabs.c	2000/03/06 02:24:12
@@ -4668,6 +4668,7 @@ init_optabs ()
   bcmp_libfunc = init_one_libfunc ("__gcc_bcmp");
   memset_libfunc = init_one_libfunc ("memset");
   bzero_libfunc = init_one_libfunc ("bzero");
+  strlen_libfunc = init_one_libfunc ("strlen");
 
   throw_libfunc = init_one_libfunc ("__throw");
   rethrow_libfunc = init_one_libfunc ("__rethrow");
===================================================================
Index: config/i386/i386.c
--- config/i386/i386.c	2000/03/03 00:54:46	1.141
+++ config/i386/i386.c	2000/03/06 02:24:14
@@ -5481,128 +5481,19 @@ void
 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
      rtx out, align_rtx, scratch;
 {
-  int align;
   rtx tmp;
-  rtx align_2_label = NULL_RTX;
-  rtx align_3_label = NULL_RTX;
-  rtx align_4_label = gen_label_rtx ();
-  rtx end_0_label = gen_label_rtx ();
+  rtx loop_label = gen_label_rtx ();
   rtx mem;
-  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
   rtx tmpreg = gen_reg_rtx (SImode);
 
-  align = 0;
-  if (GET_CODE (align_rtx) == CONST_INT)
-    align = INTVAL (align_rtx);
-
-  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
-
-  /* Is there a known alignment and is it less than 4?  */
-  if (align < 4)
-    {
-      /* Is there a known alignment and is it not 2? */
-      if (align != 2)
-	{
-	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
-	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
-
-	  /* Leave just the 3 lower bits.  */
-	  align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
-				    NULL_RTX, 0, OPTAB_WIDEN);
-
-	  emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
-
-	  tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-	  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				      gen_rtx_LABEL_REF (VOIDmode,
-							 align_4_label),
-				      pc_rtx);
-	  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
-	  emit_insn (gen_cmpsi_1 (align_rtx, GEN_INT (2)));
-
-	  tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-	  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				      gen_rtx_LABEL_REF (VOIDmode,
-							 align_2_label),
-				      pc_rtx);
-	  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
-	  tmp = gen_rtx_GTU (VOIDmode, flags, const0_rtx);
-	  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				      gen_rtx_LABEL_REF (VOIDmode,
-							 align_3_label),
-				      pc_rtx);
-	  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-	}
-      else
-        {
-	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
-	     check if is aligned to 4 - byte.  */
-
-	  align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
-				    NULL_RTX, 0, OPTAB_WIDEN);
-
-	  emit_insn (gen_cmpsi_0 (align_rtx, const0_rtx));
-
-	  tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-	  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				      gen_rtx_LABEL_REF (VOIDmode,
-							 align_4_label),
-				      pc_rtx);
-	  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-        }
-
-      mem = gen_rtx_MEM (QImode, out);
-
-      /* Now compare the bytes.  */
-
-      /* Compare the first n unaligned byte on a byte per byte basis. */
-      emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
-      tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				  gen_rtx_LABEL_REF (VOIDmode, end_0_label),
-				  pc_rtx);
-      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
-      /* Increment the address. */
-      emit_insn (gen_addsi3 (out, out, const1_rtx));
-
-      /* Not needed with an alignment of 2 */
-      if (align != 2)
-	{
-	  emit_label (align_2_label);
-
-	  emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
-	  tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-	  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				      gen_rtx_LABEL_REF (VOIDmode,
-							 end_0_label),
-				      pc_rtx);
-	  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
-	  emit_insn (gen_addsi3 (out, out, const1_rtx));
-
-	  emit_label (align_3_label);
-	}
-
-      emit_insn (gen_cmpqi_0 (mem, const0_rtx));
-
-      tmp = gen_rtx_EQ (VOIDmode, flags, const0_rtx);
-      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 
-				  gen_rtx_LABEL_REF (VOIDmode, end_0_label),
-				  pc_rtx);
-      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
-
-      emit_insn (gen_addsi3 (out, out, const1_rtx));
-    }
-
+  if (GET_CODE (align_rtx) != CONST_INT
+      || INTVAL (align_rtx) < 4)
+    abort ();
+  
   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
      align this loop.  It gives only huge programs, but does not help to
      speed up.  */
-  emit_label (align_4_label);
+  emit_label (loop_label);
 
   mem = gen_rtx_MEM (SImode, out);
   emit_move_insn (scratch, mem);
@@ -5615,7 +5506,8 @@ ix86_expand_strlensi_unroll_1 (out, alig
   emit_insn (gen_one_cmplsi2 (scratch, scratch));
   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
   emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
-  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
+  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0,
+			   loop_label);
 
   if (TARGET_CMOVE)
     {
@@ -5641,7 +5533,6 @@ ix86_expand_strlensi_unroll_1 (out, alig
 			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
 				       		     reg,
 				       		     out)));
-
     }
   else
     {
@@ -5669,8 +5560,6 @@ ix86_expand_strlensi_unroll_1 (out, alig
   tmpreg = gen_lowpart (QImode, tmpreg);
   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
   emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
-
-  emit_label (end_0_label);
 }
 
 /* Clear stack slot assignments remembered from previous functions.
===================================================================
Index: config/i386/i386.md
--- config/i386/i386.md	2000/03/01 11:15:52	1.140
+++ config/i386/i386.md	2000/03/06 02:24:15
@@ -8593,36 +8593,21 @@
 {
   rtx out, addr, eoschar, align, scratch1, scratch2, scratch3;
 
-  /* The generic case of strlen expander is long.  Avoid it's
-     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
-
-  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
-      && !optimize_size
-      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
-    FAIL;
-
   out = operands[0];
   addr = force_reg (Pmode, XEXP (operands[1], 0));
   eoschar = operands[2];
   align = operands[3];
   scratch1 = gen_reg_rtx (SImode);
 
-  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
-      && !optimize_size)
+  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx)
     {
-      /* Well it seems that some optimizer does not combine a call like
-	     foo(strlen(bar), strlen(bar));
-	 when the move and the subtraction is done here.  It does calculate
-	 the length just once when these instructions are done inside of
-	 output_strlen_unroll().  But I think since &bar[strlen(bar)] is
-	 often used and I use one fewer register for the lifetime of
-	 output_strlen_unroll() this is better.  */
+      /* This version is not a win if optimizing for size, or if
+	 the string is not properly aligned.  */
+      if (optimize <= 1 || optimize_size
+	  || GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
+	FAIL;
 
-      if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
-	emit_move_insn (scratch1, addr);
-
       emit_move_insn (out, addr);
-
       ix86_expand_strlensi_unroll_1 (out, align, scratch1);
 
       /* strlensi_unroll_1 returns the address of the zero at the end of


More information about the Gcc-patches mailing list