This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [PATCH] Fix mips_expand_synci_loop


Richard Sandiford wrote:
> 
> >> >
> >> > You also need:
> >> >
> >> > 	beq	zero, inc, skip_the_whole_thing.
> >> 
> >> And if begin == end is allowed, also:
> >> 
> >>         beq     begin, end, skip_the_whole_thing
> >> 
> >> Three bugs for the price of one.
> >> 
> >
> >   Could we ignore these two tests: (inc == 0) and (begin == end)?
> > The original implementation doesn't have them, and
> > the performance may be affected.
> 
> I think we'd better have them.  David's point was that the ISA spec
> allows processors to have no cache, and in that case, our code would
> wrongly become an infinite loop.  The begin == end check is needed too
> because empty ranges are acceptable in most C interfaces.  We 
> certainly
> don't document that they aren't here.

  Ok.

> 
> > @@ -6697,17 +6697,31 @@
> >  	     ? gen_rdhwr_synci_step_si (inc)
> >  	     : gen_rdhwr_synci_step_di (inc));
> >  
> > +  /* Calculate mask.  */
> > +  mask = gen_reg_rtx (Pmode);
> > +  mips_emit_binary (MINUS, mask, gen_rtx_REG (Pmode, 
> GP_REG_FIRST), inc);
> 
> You need to use NEG here; (minus ($0) inc) isn't canonical rtl.  Add:
> 
> /* Emit an instruction of the form (set TARGET (CODE OP0)).  */
> 
> static void
> mips_emit_unary (enum rtx_code code, rtx target, rtx op0)
> {
>   emit_insn (gen_rtx_SET (VOIDmode, target,
> 			  gen_rtx_fmt_e (code, GET_MODE (op0), op0)));
> }
> 
> /* Compute (CODE OP0) and store the result in a new register 
> of mode MODE.
>    Return that new register.  */
> 
> static rtx
> mips_force_unary (enum machine_mode mode, enum rtx_code code, rtx op0)
> {
>   rtx reg;
> 
>   reg = gen_reg_rtx (mode);
>   mips_emit_unary (code, reg, op0);
>   return reg;
> }
> 
> (before mips_emit_binary) and use:

  Yes.

> 
>   mask = mips_force_unary (Pmode, NEG, inc);
> 
> > +  /* Mask out begin by mask.  */
> > +  mips_emit_binary (AND, begin, begin, mask);
> 
>   begin = mips_force_binary (Pmode, AND, begin, mask);
> 
> > +  /* Calculate length.  */
> > +  length = gen_reg_rtx (Pmode);
> > +  mips_emit_binary (MINUS, length, end, begin);
> 
>   length = mips_force_binary (Pmode, MINUS, end, begin);
> 
> (The point being, we don't want to reuse registers unnecessarily.)
> 
> Looks good otherwise, but I'd like to see the version with the
> changes above, and with the new branches.  The testing you did
> was good, but please run it through the normal gcc testsuite too.
> 

  Yes.  I tested the target of mipsisa32r2-sde-elf, and the
number of unexpected/expected failures are the same without and with my
patch.  The JavaScriptCore tests are ok.

  Ok to commit?  Thanks!

Regards,
Chao-ying

2009-10-28  Chao-ying Fu  <fu@mips.com>

	* config/mips/mips.c (mips_emit_unary, mips_force_unary): New
	functions.
	(mips_expand_synci_loop):  Used the length rtx to control the
	synci loop from the begin rtx that points to the first byte of
	the cache line.


Index: mips.c
===================================================================
--- mips.c	(revision 153617)
+++ mips.c	(working copy)
@@ -2444,6 +2444,28 @@
 	  : emit_move_insn_1 (dest, src));
 }
 
+/* Emit an instruction of the form (set TARGET (CODE OP0)).  */
+
+static void
+mips_emit_unary (enum rtx_code code, rtx target, rtx op0)
+{
+  emit_insn (gen_rtx_SET (VOIDmode, target,
+			  gen_rtx_fmt_e (code, GET_MODE (op0), op0)));
+}
+
+/* Compute (CODE OP0) and store the result in a new register of mode MODE.
+   Return that new register.  */
+
+static rtx
+mips_force_unary (enum machine_mode mode, enum rtx_code code, rtx op0)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  mips_emit_unary (code, reg, op0);
+  return reg;
+}
+
 /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
 
 static void
@@ -6689,26 +6711,51 @@
 void
 mips_expand_synci_loop (rtx begin, rtx end)
 {
-  rtx inc, label, cmp, cmp_result;
+  rtx inc, label, end_label, cmp_result, mask, length;
 
+  /* Create end_label.  */
+  end_label = gen_label_rtx ();
+
+  /* Check if begin equals end.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, begin, end);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
   /* Load INC with the cache line size (rdhwr INC,$1).  */
   inc = gen_reg_rtx (Pmode);
   emit_insn (Pmode == SImode
 	     ? gen_rdhwr_synci_step_si (inc)
 	     : gen_rdhwr_synci_step_di (inc));
 
+  /* Check if inc is 0.  */
+  cmp_result = gen_rtx_EQ (VOIDmode, inc, const0_rtx);
+  emit_jump_insn (gen_condjump (cmp_result, end_label));
+
+  /* Calculate mask.  */
+  mask = mips_force_unary (Pmode, NEG, inc);
+
+  /* Mask out begin by mask.  */
+  begin = mips_force_binary (Pmode, AND, begin, mask);
+
+  /* Calculate length.  */
+  length = mips_force_binary (Pmode, MINUS, end, begin);
+
   /* Loop back to here.  */
   label = gen_label_rtx ();
   emit_label (label);
 
   emit_insn (gen_synci (begin));
 
-  cmp = mips_force_binary (Pmode, GTU, begin, end);
+  /* Update length.  */
+  mips_emit_binary (MINUS, length, length, inc);
 
+  /* Update begin.  */
   mips_emit_binary (PLUS, begin, begin, inc);
 
-  cmp_result = gen_rtx_EQ (VOIDmode, cmp, const0_rtx);
+  /* Check if length is greater than 0.  */
+  cmp_result = gen_rtx_GT (VOIDmode, length, const0_rtx);
   emit_jump_insn (gen_condjump (cmp_result, label));
+
+  emit_label (end_label);
 }
 
 /* Expand a QI or HI mode atomic memory operation.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]