[PATCH: PR target/41653] Add new function to compute thumb1 code size rtx cost
Carrot Wei
carrot@google.com
Tue Mar 30 13:18:00 GMT 2010
Hi, arm maintainers
Could anybody help to review this simple patch? Although there are several
hundreds of lines code, they are basically a clone of existed function
thumb1_rtx_costs except the MULT case.
thanks
Guozhi
On Sat, Mar 27, 2010 at 11:44 PM, Carrot Wei <carrot@google.com> wrote:
> Ping?
>
> On Wed, Mar 24, 2010 at 6:04 PM, Carrot Wei <carrot@google.com> wrote:
>> Hi
>>
>> Currently in arm back end there is no code size specific rtx cost function for
>> thumb1 instructions. Function thumb1_rtx_costs is called for both code size
>> and performance optimization. But its implementation is actually for
>> performance optimization, it may cause bad result for code size.
>> PR target/41653 is one example.
>>
>> This patch add a new function thumb1_size_rtx_costs to compute the code size
>> cost for thumb1 instructions. Its content is copied from thumb1_rtx_costs
>> except the MULT operation needed to fix bug PR 41653. So it won't significantly
>> impact the result when optimized for size and targeted to thumb1. But we
>> should improve it when we have more related test cases.
>>
>> Test:
>> This patch was applied to trunk GCC and tested on qemu without new failure.
>> This patch was also tested with CSiBE without mpeg2(Due to expected enum size
>> is 32, but the actual enum size is 16). With options "-mthumb -Os", the total
>> size is:
>> without this patch: 2655992
>> with this patch: 2652844
>>
>>
>> ChangeLog:
>> 2010-03-24 Wei Guozhi <carrot@google.com>
>>
>> PR target/41653
>> * config/arm/arm.c (thumb1_size_rtx_costs): New function.
>> (arm_size_rtx_costs): Call the new function when optimized for size.
>>
>> thanks
>> Guozhi
>>
>>
>> Index: arm.c
>> ===================================================================
>> --- arm.c (revision 157661)
>> +++ arm.c (working copy)
>> @@ -6884,6 +6884,130 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
>> }
>> }
>>
>> +/* Estimates the size cost of thumb1 instructions.
>> + For now most of the code is copied from thumb1_rtx_costs. We need more
>> + fine grain tuning when we have more related test cases. */
>> +static inline int
>> +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
>> +{
>> + enum machine_mode mode = GET_MODE (x);
>> +
>> + switch (code)
>> + {
>> + case ASHIFT:
>> + case ASHIFTRT:
>> + case LSHIFTRT:
>> + case ROTATERT:
>> + case PLUS:
>> + case MINUS:
>> + case COMPARE:
>> + case NEG:
>> + case NOT:
>> + return COSTS_N_INSNS (1);
>> +
>> + case MULT:
>> + if (GET_CODE (XEXP (x, 1)) == CONST_INT)
>> + {
>> + /* Thumb1 mul instruction can't operate on const. We must Load it
>> + into a register first. */
>> + int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
>> + return COSTS_N_INSNS (1) + const_size;
>> + }
>> + return COSTS_N_INSNS (1);
>> +
>> + case SET:
>> + return (COSTS_N_INSNS (1)
>> + + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
>> + + GET_CODE (SET_DEST (x)) == MEM));
>> +
>> + case CONST_INT:
>> + if (outer == SET)
>> + {
>> + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
>> + return 0;
>> + if (thumb_shiftable_const (INTVAL (x)))
>> + return COSTS_N_INSNS (2);
>> + return COSTS_N_INSNS (3);
>> + }
>> + else if ((outer == PLUS || outer == COMPARE)
>> + && INTVAL (x) < 256 && INTVAL (x) > -256)
>> + return 0;
>> + else if ((outer == IOR || outer == XOR || outer == AND)
>> + && INTVAL (x) < 256 && INTVAL (x) >= -256)
>> + return COSTS_N_INSNS (1);
>> + else if (outer == AND)
>> + {
>> + int i;
>> + /* This duplicates the tests in the andsi3 expander. */
>> + for (i = 9; i <= 31; i++)
>> + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
>> + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
>> + return COSTS_N_INSNS (2);
>> + }
>> + else if (outer == ASHIFT || outer == ASHIFTRT
>> + || outer == LSHIFTRT)
>> + return 0;
>> + return COSTS_N_INSNS (2);
>> +
>> + case CONST:
>> + case CONST_DOUBLE:
>> + case LABEL_REF:
>> + case SYMBOL_REF:
>> + return COSTS_N_INSNS (3);
>> +
>> + case UDIV:
>> + case UMOD:
>> + case DIV:
>> + case MOD:
>> + return 100;
>> +
>> + case TRUNCATE:
>> + return 99;
>> +
>> + case AND:
>> + case XOR:
>> + case IOR:
>> + /* XXX guess. */
>> + return 8;
>> +
>> + case MEM:
>> + /* XXX another guess. */
>> + /* Memory costs quite a lot for the first word, but subsequent words
>> + load at the equivalent of a single insn each. */
>> + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
>> + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
>> + ? 4 : 0));
>> +
>> + case IF_THEN_ELSE:
>> + /* XXX a guess. */
>> + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
>> + return 14;
>> + return 2;
>> +
>> + case ZERO_EXTEND:
>> + /* XXX still guessing. */
>> + switch (GET_MODE (XEXP (x, 0)))
>> + {
>> + case QImode:
>> + return (1 + (mode == DImode ? 4 : 0)
>> + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>> +
>> + case HImode:
>> + return (4 + (mode == DImode ? 4 : 0)
>> + + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>> +
>> + case SImode:
>> + return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>> +
>> + default:
>> + return 99;
>> + }
>> +
>> + default:
>> + return 99;
>> + }
>> +}
>> +
>> /* RTX costs when optimizing for size. */
>> static bool
>> arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
>> @@ -6892,8 +7016,7 @@ arm_size_rtx_costs (rtx x, enum rtx_code
>> enum machine_mode mode = GET_MODE (x);
>> if (TARGET_THUMB1)
>> {
>> - /* XXX TBD. For now, use the standard costs. */
>> - *total = thumb1_rtx_costs (x, code, outer_code);
>> + *total = thumb1_size_rtx_costs (x, code, outer_code);
>> return true;
>> }
>>
>
More information about the Gcc-patches
mailing list