[PATCH] [PR/target 105666] RISC-V: Inhibit FP <--> int register moves via tune param
Vineet Gupta
vineetg@rivosinc.com
Tue May 24 19:38:02 GMT 2022
On 5/24/22 00:59, Kito Cheng wrote:
> Committed, thanks!
Thx for the quick action Kito,
Can this be backported to gcc 12 as well ?
Thx,
-Vineet
>
> On Tue, May 24, 2022 at 3:40 AM Philipp Tomsich
> <philipp.tomsich@vrull.eu> wrote:
>> Good catch!
>>
>> On Mon, 23 May 2022 at 20:12, Vineet Gupta <vineetg@rivosinc.com> wrote:
>>
>>> Under extreme register pressure, compiler can use FP <--> int
>>> moves as a cheap alternate to spilling to memory.
>>> This was seen with SPEC2017 FP benchmark 507.cactu:
>>> ML_BSSN_Advect.cc:ML_BSSN_Advect_Body()
>>>
>>> | fmv.d.x fa5,s9 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
>>> | .LVL325:
>>> | ld s9,184(sp) # _12469, %sfp
>>> | ...
>>> | .LVL339:
>>> | fmv.x.d s4,fa5 # PDupwindNthSymm2Xt1, PDupwindNthSymm2Xt1
>>> |
>>>
>>> The FMV instructions could be costlier (than stack spill) on certain
>>> micro-architectures, thus this needs to be a per-cpu tunable
>>> (default being to inhibit on all existing RV cpus).
>>>
>>> Testsuite run with new test reports 10 failures without the fix
>>> corresponding to the build variations of pr105666.c
>>>
>>> | === gcc Summary ===
>>> |
>>> | # of expected passes 123318 (+10)
>>> | # of unexpected failures 34 (-10)
>>> | # of unexpected successes 4
>>> | # of expected failures 780
>>> | # of unresolved testcases 4
>>> | # of unsupported tests 2796
>>>
>>> gcc/Changelog:
>>>
>>> * config/riscv/riscv.cc: (struct riscv_tune_param): Add
>>> fmv_cost.
>>> (rocket_tune_info): Add default fmv_cost 8.
>>> (sifive_7_tune_info): Ditto.
>>> (thead_c906_tune_info): Ditto.
>>> (optimize_size_tune_info): Ditto.
>>> (riscv_register_move_cost): Use fmv_cost for int<->fp moves.
>>>
>>> gcc/testsuite/Changelog:
>>>
>>> * gcc.target/riscv/pr105666.c: New test.
>>>
>>> Signed-off-by: Vineet Gupta <vineetg@rivosinc.com>
>>> ---
>>> gcc/config/riscv/riscv.cc | 9 ++++
>>> gcc/testsuite/gcc.target/riscv/pr105666.c | 55 +++++++++++++++++++++++
>>> 2 files changed, 64 insertions(+)
>>> create mode 100644 gcc/testsuite/gcc.target/riscv/pr105666.c
>>>
>>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>>> index ee756aab6940..f3ac0d8865f0 100644
>>> --- a/gcc/config/riscv/riscv.cc
>>> +++ b/gcc/config/riscv/riscv.cc
>>> @@ -220,6 +220,7 @@ struct riscv_tune_param
>>> unsigned short issue_rate;
>>> unsigned short branch_cost;
>>> unsigned short memory_cost;
>>> + unsigned short fmv_cost;
>>> bool slow_unaligned_access;
>>> };
>>>
>>> @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info
>>> = {
>>> 1, /* issue_rate */
>>> 3, /* branch_cost */
>>> 5, /* memory_cost */
>>> + 8, /* fmv_cost */
>>> true, /*
>>> slow_unaligned_access */
>>> };
>>>
>>> @@ -298,6 +300,7 @@ static const struct riscv_tune_param
>>> sifive_7_tune_info = {
>>> 2, /* issue_rate */
>>> 4, /* branch_cost */
>>> 3, /* memory_cost */
>>> + 8, /* fmv_cost */
>>> true, /*
>>> slow_unaligned_access */
>>> };
>>>
>>> @@ -311,6 +314,7 @@ static const struct riscv_tune_param
>>> thead_c906_tune_info = {
>>> 1, /* issue_rate */
>>> 3, /* branch_cost */
>>> 5, /* memory_cost */
>>> + 8, /* fmv_cost */
>>> false, /* slow_unaligned_access */
>>> };
>>>
>>> @@ -324,6 +328,7 @@ static const struct riscv_tune_param
>>> optimize_size_tune_info = {
>>> 1, /* issue_rate */
>>> 1, /* branch_cost */
>>> 2, /* memory_cost */
>>> + 8, /* fmv_cost */
>>> false, /* slow_unaligned_access */
>>> };
>>>
>>> @@ -4737,6 +4742,10 @@ static int
>>> riscv_register_move_cost (machine_mode mode,
>>> reg_class_t from, reg_class_t to)
>>> {
>>> + if ((from == FP_REGS && to == GR_REGS) ||
>>> + (from == GR_REGS && to == FP_REGS))
>>> + return tune_param->fmv_cost;
>>> +
>>> return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
>>> }
>>>
>>> diff --git a/gcc/testsuite/gcc.target/riscv/pr105666.c
>>> b/gcc/testsuite/gcc.target/riscv/pr105666.c
>>> new file mode 100644
>>> index 000000000000..904f3bc0763f
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/riscv/pr105666.c
>>> @@ -0,0 +1,55 @@
>>> +/* Shamelessly plugged off
>>> gcc/testsuite/gcc.c-torture/execute/pr28982a.c.
>>> +
>>> + The idea is to induce high register pressure for both int/fp registers
>>> + so that they spill. By default FMV instructions would be used to stash
>>> + int reg to a fp reg (and vice-versa) but that could be costlier than
>>> + spilling to stack. */
>>> +
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-march=rv64g -ffast-math" } */
>>> +
>>> +#define NITER 4
>>> +#define NVARS 20
>>> +#define MULTI(X) \
>>> + X( 0), X( 1), X( 2), X( 3), X( 4), X( 5), X( 6), X( 7), X( 8), X( 9), \
>>> + X(10), X(11), X(12), X(13), X(14), X(15), X(16), X(17), X(18), X(19)
>>> +
>>> +#define DECLAREI(INDEX) inc##INDEX = incs[INDEX]
>>> +#define DECLAREF(INDEX) *ptr##INDEX = ptrs[INDEX], result##INDEX = 5
>>> +#define LOOP(INDEX) result##INDEX += result##INDEX * (*ptr##INDEX),
>>> ptr##INDEX += inc##INDEX
>>> +#define COPYOUT(INDEX) results[INDEX] = result##INDEX
>>> +
>>> +double *ptrs[NVARS];
>>> +double results[NVARS];
>>> +int incs[NVARS];
>>> +
>>> +void __attribute__((noinline))
>>> +foo (int n)
>>> +{
>>> + int MULTI (DECLAREI);
>>> + double MULTI (DECLAREF);
>>> + while (n--)
>>> + MULTI (LOOP);
>>> + MULTI (COPYOUT);
>>> +}
>>> +
>>> +double input[NITER * NVARS];
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + int i;
>>> +
>>> + for (i = 0; i < NVARS; i++)
>>> + ptrs[i] = input + i, incs[i] = i;
>>> + for (i = 0; i < NITER * NVARS; i++)
>>> + input[i] = i;
>>> + foo (NITER);
>>> + for (i = 0; i < NVARS; i++)
>>> + if (results[i] != i * NITER * (NITER + 1) / 2)
>>> + return 1;
>>> + return 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-not "\tfmv\\.d\\.x\t" } } */
>>> +/* { dg-final { scan-assembler-not "\tfmv\\.x\\.d\t" } } */
>>> --
>>> 2.32.0
>>>
>>>
More information about the Gcc-patches
mailing list