Describe more Sparc RTX costs

David S. Miller davem@redhat.com
Sun May 5 00:55:00 GMT 2002


This enhances sparc_rtx_costs as I intended to do so yesterday.
Now that the big RTX_COSTS macro was split out into sparc.c this
was easy to do :-)

Basically the latencies as known by the various DFA schedulers was
transformed into RTX costs.  I added a note to the top of this
function that genautomata should eventually do this kind of busy
work for us :-)

2002-05-05  David S. Miller  <davem@redhat.com>

	* config/sparc/sparc.c (sparc_rtx_costs): Describe costs of
	more RTX codes.
	* config/sparc/sparc.h (RTX_COSTS_CASES): List those new codes.

--- config/sparc/sparc.c.~1~	Fri May  3 22:27:55 2002
+++ config/sparc/sparc.c	Sun May  5 00:00:15 2002
@@ -8086,6 +8086,10 @@ sparc_extra_constraint_check (op, c, str
   return reload_ok_mem;
 }
 
+/* ??? This duplicates information provided to the compiler by the
+   ??? scheduler description.  Some day, teach genautomata to output
+   ??? the latencies and then CSE will just use that.  */
+
 int
 sparc_rtx_costs (x, code, outer_code)
      rtx x;
@@ -8093,14 +8097,158 @@ sparc_rtx_costs (x, code, outer_code)
 {
   switch (code)
     {
+    case PLUS: case MINUS: case ABS: case NEG:
+    case FLOAT: case UNSIGNED_FLOAT:
+    case FIX: case UNSIGNED_FIX:
+    case FLOAT_EXTEND: case FLOAT_TRUNCATE:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  switch (sparc_cpu)
+	    {
+	    case PROCESSOR_ULTRASPARC:
+	    case PROCESSOR_ULTRASPARC3:
+	      return COSTS_N_INSNS (4);
+
+	    case PROCESSOR_SUPERSPARC:
+	      return COSTS_N_INSNS (3);
+
+	    case PROCESSOR_CYPRESS:
+	      return COSTS_N_INSNS (5);
+
+	    case PROCESSOR_HYPERSPARC:
+	    case PROCESSOR_SPARCLITE86X:
+	    default:
+	      return COSTS_N_INSNS (1);
+	    }
+	}
+
+      return COSTS_N_INSNS (1);
+
+    case SQRT:
+      switch (sparc_cpu)
+	{
+	case PROCESSOR_ULTRASPARC:
+	  if (GET_MODE (x) == SFmode)
+	    return COSTS_N_INSNS (13);
+	  else
+	    return COSTS_N_INSNS (23);
+
+	case PROCESSOR_ULTRASPARC3:
+	  if (GET_MODE (x) == SFmode)
+	    return COSTS_N_INSNS (20);
+	  else
+	    return COSTS_N_INSNS (29);
+
+	case PROCESSOR_SUPERSPARC:
+	  return COSTS_N_INSNS (12);
+
+	case PROCESSOR_CYPRESS:
+	  return COSTS_N_INSNS (63);
+
+	case PROCESSOR_HYPERSPARC:
+	case PROCESSOR_SPARCLITE86X:
+	  return COSTS_N_INSNS (17);
+
+	default:
+	  return COSTS_N_INSNS (30);
+	}
+
+    case COMPARE:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  switch (sparc_cpu)
+	    {
+	    case PROCESSOR_ULTRASPARC:
+	    case PROCESSOR_ULTRASPARC3:
+	      return COSTS_N_INSNS (1);
+
+	    case PROCESSOR_SUPERSPARC:
+	      return COSTS_N_INSNS (3);
+
+	    case PROCESSOR_CYPRESS:
+	      return COSTS_N_INSNS (5);
+
+	    case PROCESSOR_HYPERSPARC:
+	    case PROCESSOR_SPARCLITE86X:
+	    default:
+	      return COSTS_N_INSNS (1);
+	    }
+	}
+
+      /* ??? Maybe mark integer compares as zero cost on
+	 ??? all UltraSPARC processors because the result
+	 ??? can be bypassed to a branch in the same group.  */
+
+      return COSTS_N_INSNS (1);
+
     case MULT:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  switch (sparc_cpu)
+	    {
+	    case PROCESSOR_ULTRASPARC:
+	    case PROCESSOR_ULTRASPARC3:
+	      return COSTS_N_INSNS (4);
+
+	    case PROCESSOR_SUPERSPARC:
+	      return COSTS_N_INSNS (3);
+
+	    case PROCESSOR_CYPRESS:
+	      return COSTS_N_INSNS (7);
+
+	    case PROCESSOR_HYPERSPARC:
+	    case PROCESSOR_SPARCLITE86X:
+	      return COSTS_N_INSNS (1);
+
+	    default:
+	      return COSTS_N_INSNS (5);
+	    }
+	}
+
+      /* The latency is actually variable for Ultra-I/II
+	 And if one of the inputs have a known constant
+	 value, we could calculate this precisely.
+
+	 However, for that to be useful we would need to
+	 add some machine description changes which would
+	 make sure small constants ended up in rs1 of the
+	 multiply instruction.  This is because the multiply
+	 latency is determined by the number of clear (or
+	 set if the value is negative) bits starting from
+	 the most significant bit of the first input.
+
+	 The algorithm for computing num_cycles of a multiply
+	 on Ultra-I/II is:
+
+	 	if (rs1 < 0)
+			highest_bit = highest_clear_bit(rs1);
+		else
+			highest_bit = highest_set_bit(rs1);
+		if (num_bits < 3)
+			highest_bit = 3;
+		num_cycles = 4 + ((highest_bit - 3) / 2);
+
+	 If we did that we would have to also consider register
+	 allocation issues that would result from forcing such
+	 a value into a register.
+
+	 There are other similar tricks we could play if we
+	 knew, for example, that one input was an array index.
+
+	 Since we do not play any such tricks currently the
+	 safest thing to do is report the worst case latency.  */
       if (sparc_cpu == PROCESSOR_ULTRASPARC)
 	return (GET_MODE (x) == DImode ?
 		COSTS_N_INSNS (34) : COSTS_N_INSNS (19));
 
+      /* Multiply latency on Ultra-III, fortunately, is constant.  */
       if (sparc_cpu == PROCESSOR_ULTRASPARC3)
 	return COSTS_N_INSNS (6);
 
+      if (sparc_cpu == PROCESSOR_HYPERSPARC
+	  || sparc_cpu == PROCESSOR_SPARCLITE86X)
+	return COSTS_N_INSNS (17);
+
       return (TARGET_HARD_MUL
 	      ? COSTS_N_INSNS (5)
 	      : COSTS_N_INSNS (25));
@@ -8109,6 +8257,40 @@ sparc_rtx_costs (x, code, outer_code)
     case UDIV:
     case MOD:
     case UMOD:
+      if (FLOAT_MODE_P (GET_MODE (x)))
+	{
+	  switch (sparc_cpu)
+	    {
+	    case PROCESSOR_ULTRASPARC:
+	      if (GET_MODE (x) == SFmode)
+		return COSTS_N_INSNS (13);
+	      else
+		return COSTS_N_INSNS (23);
+
+	    case PROCESSOR_ULTRASPARC3:
+	      if (GET_MODE (x) == SFmode)
+		return COSTS_N_INSNS (17);
+	      else
+		return COSTS_N_INSNS (20);
+
+	    case PROCESSOR_SUPERSPARC:
+	      if (GET_MODE (x) == SFmode)
+		return COSTS_N_INSNS (6);
+	      else
+		return COSTS_N_INSNS (9);
+
+	    case PROCESSOR_HYPERSPARC:
+	    case PROCESSOR_SPARCLITE86X:
+	      if (GET_MODE (x) == SFmode)
+		return COSTS_N_INSNS (8);
+	      else
+		return COSTS_N_INSNS (12);
+
+	    default:
+	      return COSTS_N_INSNS (7);
+	    }
+	}
+
       if (sparc_cpu == PROCESSOR_ULTRASPARC)
 	return (GET_MODE (x) == DImode ?
 		COSTS_N_INSNS (68) : COSTS_N_INSNS (37));
@@ -8117,11 +8299,83 @@ sparc_rtx_costs (x, code, outer_code)
 		COSTS_N_INSNS (71) : COSTS_N_INSNS (40));
       return COSTS_N_INSNS (25);
 
-      /* Make FLOAT and FIX more expensive than CONST_DOUBLE,
-	 so that cse will favor the latter.  */
-    case FLOAT:
-    case FIX:
-      return 19;
+    case IF_THEN_ELSE:
+      /* Conditional moves. */
+      switch (sparc_cpu)
+	{
+	case PROCESSOR_ULTRASPARC:
+	  return COSTS_N_INSNS (2);
+
+	case PROCESSOR_ULTRASPARC3:
+	  if (FLOAT_MODE_P (GET_MODE (x)))
+	    return COSTS_N_INSNS (3);
+	  else
+	    return COSTS_N_INSNS (2);
+
+	default:
+	  return COSTS_N_INSNS (1);
+	}
+
+    case MEM:
+      /* If outer-code is SIGN/ZERO extension we have to subtract
+	 out COSTS_N_INSNS (1) from whatever we return in determining
+	 the cost.  */
+      switch (sparc_cpu)
+	{
+	case PROCESSOR_ULTRASPARC:
+	  if (outer_code == ZERO_EXTEND)
+	    return COSTS_N_INSNS (1);
+	  else
+	    return COSTS_N_INSNS (2);
+
+	case PROCESSOR_ULTRASPARC3:
+	  if (outer_code == ZERO_EXTEND)
+	    {
+	      if (GET_MODE (x) == QImode
+		  || GET_MODE (x) == HImode
+		  || outer_code == SIGN_EXTEND)
+		return COSTS_N_INSNS (2);
+	      else
+		return COSTS_N_INSNS (1);
+	    }
+	  else
+	    {
+	      /* This handles sign extension (3 cycles)
+		 and everything else (2 cycles).  */
+	      return COSTS_N_INSNS (2);
+	    }
+
+	case PROCESSOR_SUPERSPARC:
+	  if (FLOAT_MODE_P (GET_MODE (x))
+	      || outer_code == ZERO_EXTEND
+	      || outer_code == SIGN_EXTEND)
+	    return COSTS_N_INSNS (0);
+	  else
+	    return COSTS_N_INSNS (1);
+
+	case PROCESSOR_TSC701:
+	  if (outer_code == ZERO_EXTEND
+	      || outer_code == SIGN_EXTEND)
+	    return COSTS_N_INSNS (2);
+	  else
+	    return COSTS_N_INSNS (3);
+	  
+	case PROCESSOR_CYPRESS:
+	  if (outer_code == ZERO_EXTEND
+	      || outer_code == SIGN_EXTEND)
+	    return COSTS_N_INSNS (1);
+	  else
+	    return COSTS_N_INSNS (2);
+	  
+	case PROCESSOR_HYPERSPARC:
+	case PROCESSOR_SPARCLITE86X:
+	default:
+	  if (outer_code == ZERO_EXTEND
+	      || outer_code == SIGN_EXTEND)
+	    return COSTS_N_INSNS (0);
+	  else
+	    return COSTS_N_INSNS (1);
+	}
 
     case CONST_INT:
       if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
--- config/sparc/sparc.h.~1~	Fri May  3 22:19:28 2002
+++ config/sparc/sparc.h	Sun May  5 00:00:04 2002
@@ -2675,8 +2675,14 @@ do {                                    
 /* The cases that RTX_COSTS handles.  */
 
 #define RTX_COSTS_CASES	\
+case PLUS: case MINUS: case ABS: case NEG: \
+case FLOAT: case UNSIGNED_FLOAT: \
+case FIX: case UNSIGNED_FIX: \
+case FLOAT_EXTEND: case FLOAT_TRUNCATE: \
+case SQRT: \
+case COMPARE: case IF_THEN_ELSE: \
+case MEM: \
 case MULT: case DIV: case UDIV: case MOD: case UMOD: \
-case FLOAT: case FIX: \
 case CONST_INT: case HIGH: case CONST: \
 case LABEL_REF: case SYMBOL_REF: case CONST_DOUBLE:
 



More information about the Gcc-patches mailing list