This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH: PR target/41653] Add new function to compute thumb1 code size rtx cost
- From: Carrot Wei <carrot at google dot com>
- To: gcc-patches at gcc dot gnu dot org, Richard Earnshaw <rearnsha at arm dot com>, nickc at redhat dot com, Paul Brook <paul at codesourcery dot com>
- Date: Wed, 7 Apr 2010 19:50:20 +0800
- Subject: Re: [PATCH: PR target/41653] Add new function to compute thumb1 code size rtx cost
- References: <7587b291003240304v756c2b1cgbc6754df3b9c1443@mail.gmail.com> <7587b291003270844w344b8770w857a3b39e3ceea8b@mail.gmail.com> <7587b291003300602g3972d0d7u8597cc6bf56b9fcd@mail.gmail.com>
Since the mainline is opened again, is it the time to review this patch?
thanks
Guozhi
On Tue, Mar 30, 2010 at 9:02 PM, Carrot Wei <carrot@google.com> wrote:
> Hi, arm maintainers
>
> Could anybody help to review this simple patch? Although there are several
> hundreds of lines code, they are basically a clone of existed function
> thumb1_rtx_costs except the MULT case.
>
> thanks
> Guozhi
>
> On Sat, Mar 27, 2010 at 11:44 PM, Carrot Wei <carrot@google.com> wrote:
>> Ping?
>>
>> On Wed, Mar 24, 2010 at 6:04 PM, Carrot Wei <carrot@google.com> wrote:
>>> Hi
>>>
>>> Currently in arm back end there is no code size specific rtx cost function for
>>> thumb1 instructions. Function thumb1_rtx_costs is called for both code size
>>> and performance optimization. But its implementation is actually for
>>> performance optimization, it may cause bad result for code size.
>>> PR target/41653 is one example.
>>>
>>> This patch add a new function thumb1_size_rtx_costs to compute the code size
>>> cost for thumb1 instructions. Its content is copied from thumb1_rtx_costs
>>> except the MULT operation needed to fix bug PR 41653. So it won't significantly
>>> impact the result when optimized for size and targeted to thumb1. But we
>>> should improve it when we have more related test cases.
>>>
>>> Test:
>>> This patch was applied to trunk GCC and tested on qemu without new failure.
>>> This patch was also tested with CSiBE without mpeg2(Due to expected enum size
>>> is 32, but the actual enum size is 16). With options "-mthumb -Os", the total
>>> size is:
>>> without this patch: 2655992
>>> with this patch: ? ?2652844
>>>
>>>
>>> ChangeLog:
>>> 2010-03-24 ?Wei Guozhi ?<carrot@google.com>
>>>
>>> ? ? ? ?PR target/41653
>>> ? ? ? ?* config/arm/arm.c (thumb1_size_rtx_costs): New function.
>>> ? ? ? ?(arm_size_rtx_costs): Call the new function when optimized for size.
>>>
>>> thanks
>>> Guozhi
>>>
>>>
>>> Index: arm.c
>>> ===================================================================
>>> --- arm.c ? ? ? (revision 157661)
>>> +++ arm.c ? ? ? (working copy)
>>> @@ -6884,6 +6884,130 @@ arm_rtx_costs_1 (rtx x, enum rtx_code ou
>>> ? ? }
>>> ?}
>>>
>>> +/* Estimates the size cost of thumb1 instructions.
>>> + ? For now most of the code is copied from thumb1_rtx_costs. We need more
>>> + ? fine grain tuning when we have more related test cases. ?*/
>>> +static inline int
>>> +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
>>> +{
>>> + ?enum machine_mode mode = GET_MODE (x);
>>> +
>>> + ?switch (code)
>>> + ? ?{
>>> + ? ?case ASHIFT:
>>> + ? ?case ASHIFTRT:
>>> + ? ?case LSHIFTRT:
>>> + ? ?case ROTATERT:
>>> + ? ?case PLUS:
>>> + ? ?case MINUS:
>>> + ? ?case COMPARE:
>>> + ? ?case NEG:
>>> + ? ?case NOT:
>>> + ? ? ?return COSTS_N_INSNS (1);
>>> +
>>> + ? ?case MULT:
>>> + ? ? ?if (GET_CODE (XEXP (x, 1)) == CONST_INT)
>>> + ? ? ? ?{
>>> + ? ? ? ? ?/* Thumb1 mul instruction can't operate on const. We must Load it
>>> + ? ? ? ? ? ? into a register first. ?*/
>>> + ? ? ? ? ?int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
>>> + ? ? ? ? ?return COSTS_N_INSNS (1) + const_size;
>>> + ? ? ? ?}
>>> + ? ? ?return COSTS_N_INSNS (1);
>>> +
>>> + ? ?case SET:
>>> + ? ? ?return (COSTS_N_INSNS (1)
>>> + ? ? ? ? ? ? ?+ 4 * ((GET_CODE (SET_SRC (x)) == MEM)
>>> + ? ? ? ? ? ? ? ? ? ? + GET_CODE (SET_DEST (x)) == MEM));
>>> +
>>> + ? ?case CONST_INT:
>>> + ? ? ?if (outer == SET)
>>> + ? ? ? ?{
>>> + ? ? ? ? ?if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
>>> + ? ? ? ? ? ?return 0;
>>> + ? ? ? ? ?if (thumb_shiftable_const (INTVAL (x)))
>>> + ? ? ? ? ? ?return COSTS_N_INSNS (2);
>>> + ? ? ? ? ?return COSTS_N_INSNS (3);
>>> + ? ? ? ?}
>>> + ? ? ?else if ((outer == PLUS || outer == COMPARE)
>>> + ? ? ? ? ? ? ? && INTVAL (x) < 256 && INTVAL (x) > -256)
>>> + ? ? ? ?return 0;
>>> + ? ? ?else if ((outer == IOR || outer == XOR || outer == AND)
>>> + ? ? ? ? ? ? ? && INTVAL (x) < 256 && INTVAL (x) >= -256)
>>> + ? ? ? ?return COSTS_N_INSNS (1);
>>> + ? ? ?else if (outer == AND)
>>> + ? ? ? ?{
>>> + ? ? ? ? ?int i;
>>> + ? ? ? ? ?/* This duplicates the tests in the andsi3 expander. ?*/
>>> + ? ? ? ? ?for (i = 9; i <= 31; i++)
>>> + ? ? ? ? ? ?if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
>>> + ? ? ? ? ? ? ? ?|| (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
>>> + ? ? ? ? ? ? ?return COSTS_N_INSNS (2);
>>> + ? ? ? ?}
>>> + ? ? ?else if (outer == ASHIFT || outer == ASHIFTRT
>>> + ? ? ? ? ? ? ? || outer == LSHIFTRT)
>>> + ? ? ? ?return 0;
>>> + ? ? ?return COSTS_N_INSNS (2);
>>> +
>>> + ? ?case CONST:
>>> + ? ?case CONST_DOUBLE:
>>> + ? ?case LABEL_REF:
>>> + ? ?case SYMBOL_REF:
>>> + ? ? ?return COSTS_N_INSNS (3);
>>> +
>>> + ? ?case UDIV:
>>> + ? ?case UMOD:
>>> + ? ?case DIV:
>>> + ? ?case MOD:
>>> + ? ? ?return 100;
>>> +
>>> + ? ?case TRUNCATE:
>>> + ? ? ?return 99;
>>> +
>>> + ? ?case AND:
>>> + ? ?case XOR:
>>> + ? ?case IOR:
>>> + ? ? ?/* XXX guess. ?*/
>>> + ? ? ?return 8;
>>> +
>>> + ? ?case MEM:
>>> + ? ? ?/* XXX another guess. ?*/
>>> + ? ? ?/* Memory costs quite a lot for the first word, but subsequent words
>>> + ? ? ? ? load at the equivalent of a single insn each. ?*/
>>> + ? ? ?return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
>>> + ? ? ? ? ? ? ?+ ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
>>> + ? ? ? ? ? ? ? ? ? 4 : 0));
>>> +
>>> + ? ?case IF_THEN_ELSE:
>>> + ? ? ?/* XXX a guess. ?*/
>>> + ? ? ?if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
>>> + ? ? ? ?return 14;
>>> + ? ? ?return 2;
>>> +
>>> + ? ?case ZERO_EXTEND:
>>> + ? ? ?/* XXX still guessing. ?*/
>>> + ? ? ?switch (GET_MODE (XEXP (x, 0)))
>>> + ? ? ? ?{
>>> + ? ? ? ? ?case QImode:
>>> + ? ? ? ? ? ?return (1 + (mode == DImode ? 4 : 0)
>>> + ? ? ? ? ? ? ? ? ? ?+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>>> +
>>> + ? ? ? ? ?case HImode:
>>> + ? ? ? ? ? ?return (4 + (mode == DImode ? 4 : 0)
>>> + ? ? ? ? ? ? ? ? ? ?+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>>> +
>>> + ? ? ? ? ?case SImode:
>>> + ? ? ? ? ? ?return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
>>> +
>>> + ? ? ? ? ?default:
>>> + ? ? ? ? ? ?return 99;
>>> + ? ? ? ?}
>>> +
>>> + ? ?default:
>>> + ? ? ?return 99;
>>> + ? ?}
>>> +}
>>> +
>>> ?/* RTX costs when optimizing for size. ?*/
>>> ?static bool
>>> ?arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
>>> @@ -6892,8 +7016,7 @@ arm_size_rtx_costs (rtx x, enum rtx_code
>>> ? enum machine_mode mode = GET_MODE (x);
>>> ? if (TARGET_THUMB1)
>>> ? ? {
>>> - ? ? ?/* XXX TBD. ?For now, use the standard costs. ?*/
>>> - ? ? ?*total = thumb1_rtx_costs (x, code, outer_code);
>>> + ? ? ?*total = thumb1_size_rtx_costs (x, code, outer_code);
>>> ? ? ? return true;
>>> ? ? }
>>>
>>
>