[PATCH: PR target/41653] Add new function to compute thumb1 code size rtx cost

Carrot Wei carrot@google.com
Sat Mar 27 17:32:00 GMT 2010


Ping?

On Wed, Mar 24, 2010 at 6:04 PM, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> Currently in arm back end there is no code size specific rtx cost function for
> thumb1 instructions. Function thumb1_rtx_costs is called for both code size
> and performance optimization. But its implementation is actually for
> performance optimization, it may cause bad result for code size.
> PR target/41653 is one example.
>
> This patch add a new function thumb1_size_rtx_costs to compute the code size
> cost for thumb1 instructions. Its content is copied from thumb1_rtx_costs
> except the MULT operation needed to fix bug PR 41653. So it won't significantly
> impact the result when optimized for size and targeted to thumb1. But we
> should improve it when we have more related test cases.
>
> Test:
> This patch was applied to trunk GCC and tested on qemu without new failure.
> This patch was also tested with CSiBE without mpeg2(Due to expected enum size
> is 32, but the actual enum size is 16). With options "-mthumb -Os", the total
> size is:
> without this patch: 2655992
> with this patch:    2652844
>
>
> ChangeLog:
> 2010-03-24  Wei Guozhi  <carrot@google.com>
>
>        PR target/41653
>        * config/arm/arm.c (thumb1_size_rtx_costs): New function.
>        (arm_size_rtx_costs): Call the new function when optimized for size.
>
> thanks
> Guozhi
>
>
> Index: arm.c
> ===================================================================
> --- arm.c       (revision 157661)
> +++ arm.c       (working copy)
> @@ -6884,6 +6884,130 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
>     }
>  }
>
> +/* Estimates the size cost of thumb1 instructions.
> +   For now most of the code is copied from thumb1_rtx_costs. We need more
> +   fine grain tuning when we have more related test cases.  */
> +static inline int
> +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
> +{
> +  enum machine_mode mode = GET_MODE (x);
> +
> +  switch (code)
> +    {
> +    case ASHIFT:
> +    case ASHIFTRT:
> +    case LSHIFTRT:
> +    case ROTATERT:
> +    case PLUS:
> +    case MINUS:
> +    case COMPARE:
> +    case NEG:
> +    case NOT:
> +      return COSTS_N_INSNS (1);
> +
> +    case MULT:
> +      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
> +        {
> +          /* Thumb1 mul instruction can't operate on const. We must Load it
> +             into a register first.  */
> +          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
> +          return COSTS_N_INSNS (1) + const_size;
> +        }
> +      return COSTS_N_INSNS (1);
> +
> +    case SET:
> +      return (COSTS_N_INSNS (1)
> +              + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
> +                     + GET_CODE (SET_DEST (x)) == MEM));
> +
> +    case CONST_INT:
> +      if (outer == SET)
> +        {
> +          if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
> +            return 0;
> +          if (thumb_shiftable_const (INTVAL (x)))
> +            return COSTS_N_INSNS (2);
> +          return COSTS_N_INSNS (3);
> +        }
> +      else if ((outer == PLUS || outer == COMPARE)
> +               && INTVAL (x) < 256 && INTVAL (x) > -256)
> +        return 0;
> +      else if ((outer == IOR || outer == XOR || outer == AND)
> +               && INTVAL (x) < 256 && INTVAL (x) >= -256)
> +        return COSTS_N_INSNS (1);
> +      else if (outer == AND)
> +        {
> +          int i;
> +          /* This duplicates the tests in the andsi3 expander.  */
> +          for (i = 9; i <= 31; i++)
> +            if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
> +                || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
> +              return COSTS_N_INSNS (2);
> +        }
> +      else if (outer == ASHIFT || outer == ASHIFTRT
> +               || outer == LSHIFTRT)
> +        return 0;
> +      return COSTS_N_INSNS (2);
> +
> +    case CONST:
> +    case CONST_DOUBLE:
> +    case LABEL_REF:
> +    case SYMBOL_REF:
> +      return COSTS_N_INSNS (3);
> +
> +    case UDIV:
> +    case UMOD:
> +    case DIV:
> +    case MOD:
> +      return 100;
> +
> +    case TRUNCATE:
> +      return 99;
> +
> +    case AND:
> +    case XOR:
> +    case IOR:
> +      /* XXX guess.  */
> +      return 8;
> +
> +    case MEM:
> +      /* XXX another guess.  */
> +      /* Memory costs quite a lot for the first word, but subsequent words
> +         load at the equivalent of a single insn each.  */
> +      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
> +              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
> +                 ? 4 : 0));
> +
> +    case IF_THEN_ELSE:
> +      /* XXX a guess.  */
> +      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
> +        return 14;
> +      return 2;
> +
> +    case ZERO_EXTEND:
> +      /* XXX still guessing.  */
> +      switch (GET_MODE (XEXP (x, 0)))
> +        {
> +          case QImode:
> +            return (1 + (mode == DImode ? 4 : 0)
> +                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> +          case HImode:
> +            return (4 + (mode == DImode ? 4 : 0)
> +                    + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> +          case SImode:
> +            return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> +          default:
> +            return 99;
> +        }
> +
> +    default:
> +      return 99;
> +    }
> +}
> +
>  /* RTX costs when optimizing for size.  */
>  static bool
>  arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
> @@ -6892,8 +7016,7 @@ arm_size_rtx_costs (rtx x, enum rtx_code
>   enum machine_mode mode = GET_MODE (x);
>   if (TARGET_THUMB1)
>     {
> -      /* XXX TBD.  For now, use the standard costs.  */
> -      *total = thumb1_rtx_costs (x, code, outer_code);
> +      *total = thumb1_size_rtx_costs (x, code, outer_code);
>       return true;
>     }
>



More information about the Gcc-patches mailing list