[PATCH] Fix PR optimization/5999

Jakub Jelinek jakub@redhat.com
Tue Mar 19 15:52:00 GMT 2002


On Tue, Mar 19, 2002 at 11:51:36AM -0800, Richard Henderson wrote:
> On Tue, Mar 19, 2002 at 10:18:57AM -0800, Dale Johannesen wrote:
> > I tried this earlier
> > http://gcc.gnu.org/ml/gcc-patches/2002-01/msg02017.html
> > and it was not accepted.
> 
> Hum.  Sorry I missed that.
> 
> > Toon Moene thinks complex divisions should be handled as well
> > (see followups).  I haven't got around to doing that.
> 
> Well, we're certainly not going to be adding that sort
> of code to the branch.  It'd be great if someone could
> do it for mainline though.

I've played with this a little bit.
Doing it the same way as for real RDIV_EXPRs in expand_expr turned out
to generate very bad code, so I changed optabs complex
divide expanders instead.
Have been benchmarking this on 4 different functions called in tight loops
on PIII. Only for fn2 this turned out to be a win (12% faster),
on the other side the patch made fn1 and fn4 slightly slower and fn3 14%
slower.

__complex__ double X;
__complex__ double fn1 (__complex__ double x, __complex__ double y)
{
  return x / y;
}
__complex__ double fn2 (__complex__ double x, __complex__ double y)
{
  X = (x + 1) / y;
  return x / y;
}
__complex__ double fn3 (__complex__ double x, double y)
{
  return x / y;
}
__complex__ double fn4 (__complex__ double x, double y)
{
  X = (x + 1) / y;
  return x / y;
}

--- gcc/optabs.c.jj	Tue Feb 26 10:33:18 2002
+++ gcc/optabs.c	Wed Mar 20 00:26:08 2002
@@ -315,6 +315,22 @@ expand_cmplxdiv_straight (real0, real1, 
 	return 0;
     }
 
+  /* Emit (a+ib)/(c+id) as (a+ib)(c-id)*(1/(cc+dd).
+     Thus we do exchange two expensive divides for one divide and two less
+     expensive multiplications.  */
+  if (class == MODE_COMPLEX_FLOAT
+      && flag_unsafe_math_optimizations
+      && optimize && !optimize_size)
+    {
+      temp1 = expand_binop (submode, binoptab, CONST1_RTX (submode),
+			    divisor, NULL_RTX, unsignedp, methods);
+      if (temp1 != 0)
+	{
+	  divisor = temp1;
+	  binoptab = this_mul_optab;
+	}
+    }
+
   if (class == MODE_COMPLEX_FLOAT)
     res = expand_binop (submode, binoptab, real_t, divisor,
 			realr, unsignedp, methods);
@@ -365,6 +381,7 @@ expand_cmplxdiv_wide (real0, real1, imag
   optab this_sub_optab = sub_optab;
   optab this_neg_optab = neg_optab;
   optab this_mul_optab = smul_optab;
+  optab orig_binoptab = binoptab;
 
   if (binoptab == sdivv_optab)
     {
@@ -476,6 +493,22 @@ expand_cmplxdiv_wide (real0, real1, imag
 	return 0;
     }
 
+  /* Emit the two divisions as two multiplications by 1/divisor.
+     Thus we do exchange two expensive divides for one divide and two less
+     expensive multiplications.  */
+  if (class == MODE_COMPLEX_FLOAT
+      && flag_unsafe_math_optimizations
+      && optimize && !optimize_size)
+    {
+      temp1 = expand_binop (submode, binoptab, CONST1_RTX (submode),
+			    divisor, NULL_RTX, unsignedp, methods);
+      if (temp1 != 0)
+	{
+	  divisor = temp1;
+	  binoptab = this_mul_optab;
+	}
+    }
+
   if (class == MODE_COMPLEX_FLOAT)
     res = expand_binop (submode, binoptab, real_t, divisor,
 			realr, unsignedp, methods);
@@ -502,6 +535,7 @@ expand_cmplxdiv_wide (real0, real1, imag
   if (res != imagr)
     emit_move_insn (imagr, res);
 
+  binoptab = orig_binoptab;
   lab2 = gen_label_rtx ();
   emit_jump_insn (gen_jump (lab2));
   emit_barrier ();
@@ -576,6 +610,22 @@ expand_cmplxdiv_wide (real0, real1, imag
 	return 0;
     }
 
+  /* Emit the two divisions as two multiplications by 1/divisor.
+     Thus we do exchange two expensive divides for one divide and two less
+     expensive multiplications.  */
+  if (class == MODE_COMPLEX_FLOAT
+      && flag_unsafe_math_optimizations
+      && optimize && !optimize_size)
+    {
+      temp1 = expand_binop (submode, binoptab, CONST1_RTX (submode),
+			    divisor, NULL_RTX, unsignedp, methods);
+      if (temp1 != 0)
+	{
+	  divisor = temp1;
+	  binoptab = this_mul_optab;
+	}
+    }
+
   if (class == MODE_COMPLEX_FLOAT)
     res = expand_binop (submode, binoptab, real_t, divisor,
 			realr, unsignedp, methods);
@@ -1664,6 +1714,24 @@ expand_binop (mode, binoptab, op0, op1, 
 	      /* Don't fetch these from memory more than once.  */
 	      real1 = force_reg (submode, real1);
 
+	      /* Emit (a+ib)/c as (a+ib)*(1/c).  Thus we do exchange two
+		 expensive divides for one divide and two less expensive
+		 multiplications.  */
+	      if (class == MODE_COMPLEX_FLOAT
+		  && flag_unsafe_math_optimizations
+		  && optimize && !optimize_size)
+		{
+		  rtx temp = expand_binop (submode, binoptab,
+					   CONST1_RTX (submode), real1,
+					   NULL_RTX, unsignedp, methods);
+		  if (temp != 0)
+		    {
+		      real1 = temp;
+		      binoptab = (binoptab == sdivv_optab
+				  ? smulv_optab : smul_optab);
+		    }
+		}
+
 	      /* Simply divide the real and imaginary parts by `c' */
 	      if (class == MODE_COMPLEX_FLOAT)
 		res = expand_binop (submode, binoptab, real0, real1,


	Jakub



More information about the Gcc-patches mailing list