[PATCH: PR target/41653] Add new function to compute thumb1 code size rtx cost
Carrot Wei
carrot@google.com
Sat Mar 27 17:32:00 GMT 2010
Ping?
On Wed, Mar 24, 2010 at 6:04 PM, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> Currently in arm back end there is no code size specific rtx cost function for
> thumb1 instructions. Function thumb1_rtx_costs is called for both code size
> and performance optimization. But its implementation is actually for
> performance optimization, it may cause bad result for code size.
> PR target/41653 is one example.
>
> This patch add a new function thumb1_size_rtx_costs to compute the code size
> cost for thumb1 instructions. Its content is copied from thumb1_rtx_costs
> except the MULT operation needed to fix bug PR 41653. So it won't significantly
> impact the result when optimized for size and targeted to thumb1. But we
> should improve it when we have more related test cases.
>
> Test:
> This patch was applied to trunk GCC and tested on qemu without new failure.
> This patch was also tested with CSiBE without mpeg2(Due to expected enum size
> is 32, but the actual enum size is 16). With options "-mthumb -Os", the total
> size is:
> without this patch: 2655992
> with this patch: 2652844
>
>
> ChangeLog:
> 2010-03-24 Wei Guozhi <carrot@google.com>
>
> PR target/41653
> * config/arm/arm.c (thumb1_size_rtx_costs): New function.
> (arm_size_rtx_costs): Call the new function when optimized for size.
>
> thanks
> Guozhi
>
>
> Index: arm.c
> ===================================================================
> --- arm.c (revision 157661)
> +++ arm.c (working copy)
> @@ -6884,6 +6884,130 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
> }
> }
>
> +/* Estimates the size cost of thumb1 instructions.
> + For now most of the code is copied from thumb1_rtx_costs. We need more
> + fine grain tuning when we have more related test cases. */
> +static inline int
> +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
> +{
> + enum machine_mode mode = GET_MODE (x);
> +
> + switch (code)
> + {
> + case ASHIFT:
> + case ASHIFTRT:
> + case LSHIFTRT:
> + case ROTATERT:
> + case PLUS:
> + case MINUS:
> + case COMPARE:
> + case NEG:
> + case NOT:
> + return COSTS_N_INSNS (1);
> +
> + case MULT:
> + if (GET_CODE (XEXP (x, 1)) == CONST_INT)
> + {
> + /* Thumb1 mul instruction can't operate on const. We must Load it
> + into a register first. */
> + int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
> + return COSTS_N_INSNS (1) + const_size;
> + }
> + return COSTS_N_INSNS (1);
> +
> + case SET:
> + return (COSTS_N_INSNS (1)
> + + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
> + + GET_CODE (SET_DEST (x)) == MEM));
> +
> + case CONST_INT:
> + if (outer == SET)
> + {
> + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
> + return 0;
> + if (thumb_shiftable_const (INTVAL (x)))
> + return COSTS_N_INSNS (2);
> + return COSTS_N_INSNS (3);
> + }
> + else if ((outer == PLUS || outer == COMPARE)
> + && INTVAL (x) < 256 && INTVAL (x) > -256)
> + return 0;
> + else if ((outer == IOR || outer == XOR || outer == AND)
> + && INTVAL (x) < 256 && INTVAL (x) >= -256)
> + return COSTS_N_INSNS (1);
> + else if (outer == AND)
> + {
> + int i;
> + /* This duplicates the tests in the andsi3 expander. */
> + for (i = 9; i <= 31; i++)
> + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
> + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
> + return COSTS_N_INSNS (2);
> + }
> + else if (outer == ASHIFT || outer == ASHIFTRT
> + || outer == LSHIFTRT)
> + return 0;
> + return COSTS_N_INSNS (2);
> +
> + case CONST:
> + case CONST_DOUBLE:
> + case LABEL_REF:
> + case SYMBOL_REF:
> + return COSTS_N_INSNS (3);
> +
> + case UDIV:
> + case UMOD:
> + case DIV:
> + case MOD:
> + return 100;
> +
> + case TRUNCATE:
> + return 99;
> +
> + case AND:
> + case XOR:
> + case IOR:
> + /* XXX guess. */
> + return 8;
> +
> + case MEM:
> + /* XXX another guess. */
> + /* Memory costs quite a lot for the first word, but subsequent words
> + load at the equivalent of a single insn each. */
> + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
> + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
> + ? 4 : 0));
> +
> + case IF_THEN_ELSE:
> + /* XXX a guess. */
> + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
> + return 14;
> + return 2;
> +
> + case ZERO_EXTEND:
> + /* XXX still guessing. */
> + switch (GET_MODE (XEXP (x, 0)))
> + {
> + case QImode:
> + return (1 + (mode == DImode ? 4 : 0)
> + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> + case HImode:
> + return (4 + (mode == DImode ? 4 : 0)
> + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> + case SImode:
> + return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
> +
> + default:
> + return 99;
> + }
> +
> + default:
> + return 99;
> + }
> +}
> +
> /* RTX costs when optimizing for size. */
> static bool
> arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
> @@ -6892,8 +7016,7 @@ arm_size_rtx_costs (rtx x, enum rtx_code
> enum machine_mode mode = GET_MODE (x);
> if (TARGET_THUMB1)
> {
> - /* XXX TBD. For now, use the standard costs. */
> - *total = thumb1_rtx_costs (x, code, outer_code);
> + *total = thumb1_size_rtx_costs (x, code, outer_code);
> return true;
> }
>
More information about the Gcc-patches
mailing list