[PATCH/AARCH64] Add rtx_costs routine for vulcan.

Fri Jul 8 10:32:00 GMT 2016

Hi James,

Please find the patch after taking care of your comments.


> Did you see those patches, and did you consider whether there would be a
> benefit to doing the same for Vulcan?
In our simulation environment, we did not observe any performance gain
for specfp2006.
However, we did it to keep the cost strategy same as cortexa-57/53.

Please review and merge to trunk.


gcc/ChangeLog:

Virendra Pathak  <virendra.pathak@broadcom.com>
Julian Brown  <julian@codesourcery.com>

        * config/aarch64/aarch64-cores.def: Update vulcan COSTS.
        * config/aarch64/aarch64-cost-tables.h
        (vulcan_extra_costs): New variable.
        * config/aarch64/aarch64.c
        (vulcan_addrcost_table): Likewise.
        (vulcan_regmove_cost): Likewise.
        (vulcan_vector_cost): Likewise.
        (vulcan_branch_cost): Likewise.
        (vulcan_tunings): Likewise.



with regards,
Virendra Pathak


On Wed, Jun 29, 2016 at 4:23 PM, Virendra Pathak
<virendra.pathak@broadcom.com> wrote:
> Hi James,
>
>> Did you see those patches, and did you consider whether there would be a
>> benefit to doing the same for Vulcan?
> No. I have not studied those patches yet. Currently I am working on
> adding vulcan scheduler as a next patch.
> Kindly advise on the following:
> Could this patch be merged now (assuming you are okay),
> and I will update vulcan costs based on your patch later (after vulcan
> scheduler)?
>
> Thanks for your time.
>
> with regards,
> Virendra Pathak
>
>
> On Wed, Jun 29, 2016 at 4:11 PM, James Greenhalgh
> <james.greenhalgh@arm.com> wrote:
>> On Thu, Jun 23, 2016 at 02:45:21PM +0530, Virendra Pathak wrote:
>>> Hi gcc-patches group,
>>>
>>> Please find the patch for adding rtx_costs routine for vulcan cpu.
>>>
>>> Tested with compiling cross aarch64-linux-gcc , bootstrapped native
>>> aarch64-unknown-linux-gnu
>>> and make check (gcc). No new regression failure is added by this patch.
>>>
>>> Kindly review and merge the patch to trunk, if the patch is okay.
>>> Thanks.
>>
>> This is OK, but I have the same question for you as I had for the
>> qdf24xx tuning that Jim proposed, so I won't commit it yet...
>>
>>> gcc/ChangeLog:
>>>
>>> Virendra Pathak  <virendra.pathak@broadcom.com>
>>>
>>>         * config/aarch64/aarch64-cores.def: Update vulcan COSTS.
>>>         * config/aarch64/aarch64-cost-tables.h
>>>         (vulcan_extra_costs): New variable.
>>>         * config/aarch64/aarch64.c
>>>         (vulcan_addrcost_table): Likewise.
>>>         (vulcan_regmove_cost): Likewise.
>>>         (vulcan_vector_cost): Likewise.
>>>         (vulcan_branch_cost): Likewise.
>>>         (vulcan_tunings): Likewise.
>>
>>
>>>
>>> +  {
>>> +    /* FP SFmode */
>>> +    {
>>> +      COSTS_N_INSNS (16),    /* Div.  */
>>> +      COSTS_N_INSNS (6),     /* Mult.  */
>>> +      COSTS_N_INSNS (6),     /* Mult_addsub. */
>>> +      COSTS_N_INSNS (6),     /* Fma.  */
>>> +      COSTS_N_INSNS (6),     /* Addsub.  */
>>> +      COSTS_N_INSNS (5),     /* Fpconst. */
>>> +      COSTS_N_INSNS (5),     /* Neg.  */
>>> +      COSTS_N_INSNS (5),     /* Compare.  */
>>> +      COSTS_N_INSNS (7),     /* Widen.  */
>>> +      COSTS_N_INSNS (7),     /* Narrow.  */
>>> +      COSTS_N_INSNS (7),     /* Toint.  */
>>> +      COSTS_N_INSNS (7),     /* Fromint.  */
>>> +      COSTS_N_INSNS (7)      /* Roundint.  */
>>> +    },
>>> +    /* FP DFmode */
>>> +    {
>>> +      COSTS_N_INSNS (23),    /* Div.  */
>>> +      COSTS_N_INSNS (6),     /* Mult.  */
>>> +      COSTS_N_INSNS (6),     /* Mult_addsub.  */
>>> +      COSTS_N_INSNS (6),     /* Fma.  */
>>> +      COSTS_N_INSNS (6),     /* Addsub.  */
>>> +      COSTS_N_INSNS (5),     /* Fpconst.  */
>>> +      COSTS_N_INSNS (5),     /* Neg.  */
>>> +      COSTS_N_INSNS (5),     /* Compare.  */
>>> +      COSTS_N_INSNS (7),     /* Widen.  */
>>> +      COSTS_N_INSNS (7),     /* Narrow.  */
>>> +      COSTS_N_INSNS (7),     /* Toint.  */
>>> +      COSTS_N_INSNS (7),     /* Fromint.  */
>>> +      COSTS_N_INSNS (7)      /* Roundint.  */
>>> +    }
>>
>> Recently ( https://gcc.gnu.org/ml/gcc-patches/2016-06/msg00251.html ,
>>  https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01418.html ), I changed the
>> Cortex-A57 and Cortex-A53 cost tables to make the cost of a floating-point
>> operation relative to the cost of a floating-point move. This gave some
>> code generation benefits, particularly around conditional execution and
>> generation of constants.
>>
>> Did you see those patches, and did you consider whether there would be a
>> benefit to doing the same for Vulcan?
>>
>> Thanks,
>> James
>>
-------------- next part --------------
From ac65cb8040cb4ee5a0124106bf077afe8cd43105 Mon Sep 17 00:00:00 2001
From: Virendra Pathak <virendra.pathak@broadcom.com>
Date: Tue, 21 Jun 2016 01:44:38 -0700
Subject: [PATCH] AArch64: Add rtx_costs routine for vulcan.

---
 gcc/config/aarch64/aarch64-cores.def     |   2 +-
 gcc/config/aarch64/aarch64-cost-tables.h | 102 +++++++++++++++++++++++++++++++
 gcc/config/aarch64/aarch64.c             |  75 +++++++++++++++++++++++
 3 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index c4b3118..d9da257 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -52,7 +52,7 @@ AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xge
 
 /* V8.1 Architecture Processors.  */
 
-AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, cortexa57, "0x42", "0x516")
+AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
 
 /* V8 big.LITTLE implementations.  */
 
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 3a3f519..54e843c 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -127,6 +127,108 @@ const struct cpu_cost_table thunderx_extra_costs =
   }
 };
 
+const struct cpu_cost_table vulcan_extra_costs =
+{
+  /* ALU */
+  {
+    0,			/* Arith.  */
+    0,			/* Logical.  */
+    0,			/* Shift.  */
+    0,			/* Shift_reg.  */
+    COSTS_N_INSNS (1),	/* Arith_shift.  */
+    COSTS_N_INSNS (1),	/* Arith_shift_reg.  */
+    COSTS_N_INSNS (1),	/* Log_shift.  */
+    COSTS_N_INSNS (1),	/* Log_shift_reg.  */
+    0,			/* Extend.  */
+    COSTS_N_INSNS (1),	/* Extend_arith.  */
+    0,			/* Bfi.  */
+    0,			/* Bfx.  */
+    COSTS_N_INSNS (3),	/* Clz.  */
+    0,			/* Rev.  */
+    0,			/* Non_exec.  */
+    true		/* Non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),	/* Simple.  */
+      COSTS_N_INSNS (4),	/* Flag_setting.  */
+      COSTS_N_INSNS (4),	/* Extend.  */
+      COSTS_N_INSNS (5),	/* Add.  */
+      COSTS_N_INSNS (5),	/* Extend_add.  */
+      COSTS_N_INSNS (18)	/* Idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (4),       /* Simple.  */
+      0,                       /* Flag_setting.  */
+      COSTS_N_INSNS (4),       /* Extend.  */
+      COSTS_N_INSNS (5),       /* Add.  */
+      COSTS_N_INSNS (5),       /* Extend_add.  */
+      COSTS_N_INSNS (26)       /* Idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (4),	/* Load.  */
+    COSTS_N_INSNS (4),	/* Load_sign_extend.  */
+    COSTS_N_INSNS (5),	/* Ldrd.  */
+    COSTS_N_INSNS (4),	/* Ldm_1st.  */
+    1,			/* Ldm_regs_per_insn_1st.  */
+    1,			/* Ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),	/* Loadf.  */
+    COSTS_N_INSNS (4),	/* Loadd.  */
+    COSTS_N_INSNS (4),	/* Load_unaligned.  */
+    0,			/* Store.  */
+    0,			/* Strd.  */
+    0,			/* Stm_1st.  */
+    1,			/* Stm_regs_per_insn_1st.  */
+    1,			/* Stm_regs_per_insn_subsequent.  */
+    0,			/* Storef.  */
+    0,			/* Stored.  */
+    0,			/* Store_unaligned.  */
+    COSTS_N_INSNS (1),	/* Loadv.  */
+    COSTS_N_INSNS (1)	/* Storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (4),	/* Div.  */
+      COSTS_N_INSNS (1),	/* Mult.  */
+      COSTS_N_INSNS (1),	/* Mult_addsub. */
+      COSTS_N_INSNS (1),	/* Fma.  */
+      COSTS_N_INSNS (1),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst. */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (2),	/* Widen.  */
+      COSTS_N_INSNS (2),	/* Narrow.  */
+      COSTS_N_INSNS (2),	/* Toint.  */
+      COSTS_N_INSNS (2),	/* Fromint.  */
+      COSTS_N_INSNS (2) 	/* Roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (6),	/* Div.  */
+      COSTS_N_INSNS (1),	/* Mult.  */
+      COSTS_N_INSNS (1),	/* Mult_addsub.  */
+      COSTS_N_INSNS (1),	/* Fma.  */
+      COSTS_N_INSNS (1),	/* Addsub.  */
+      COSTS_N_INSNS (1),	/* Fpconst.  */
+      COSTS_N_INSNS (1),	/* Neg.  */
+      COSTS_N_INSNS (1),	/* Compare.  */
+      COSTS_N_INSNS (2),	/* Widen.  */
+      COSTS_N_INSNS (2),	/* Narrow.  */
+      COSTS_N_INSNS (2),	/* Toint.  */
+      COSTS_N_INSNS (2),	/* Fromint.  */
+      COSTS_N_INSNS (2) 	/* Roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)	/* Alu.  */
+  }
+};
 
 
 #endif
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 512ef10..5b1595c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -266,6 +266,22 @@ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
   0 /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table vulcan_addrcost_table =
+{
+    {
+      0, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      2, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  2, /* register_offset  */
+  3, /* register_sextend  */
+  3, /* register_zextend  */
+  0, /* imm_offset  */
+};
+
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   1, /* GP2GP  */
@@ -333,6 +349,15 @@ static const struct cpu_regmove_cost qdf24xx_regmove_cost =
   4 /* FP2FP  */
 };
 
+static const struct cpu_regmove_cost vulcan_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of int<->fp moves for spilling.  */
+  8, /* GP2FP  */
+  8, /* FP2GP  */
+  4  /* FP2FP  */
+};
+
 /* Generic costs for vector insn classes.  */
 static const struct cpu_vector_cost generic_vector_cost =
 {
@@ -404,6 +429,24 @@ static const struct cpu_vector_cost xgene1_vector_cost =
   1 /* cond_not_taken_branch_cost  */
 };
 
+/* Costs for vector insn classes for Vulcan.  */
+static const struct cpu_vector_cost vulcan_vector_cost =
+{
+  6, /* scalar_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  6, /* vec_stmt_cost  */
+  3, /* vec_permute_cost  */
+  6, /* vec_to_scalar_cost  */
+  5, /* scalar_to_vec_cost  */
+  8, /* vec_align_load_cost  */
+  8, /* vec_unalign_load_cost  */
+  4, /* vec_unalign_store_cost  */
+  4, /* vec_store_cost  */
+  2, /* cond_taken_branch_cost  */
+  1  /* cond_not_taken_branch_cost  */
+};
+
 /* Generic costs for branch instructions.  */
 static const struct cpu_branch_cost generic_branch_cost =
 {
@@ -418,6 +461,13 @@ static const struct cpu_branch_cost cortexa57_branch_cost =
   3   /* Unpredictable.  */
 };
 
+/* Branch costs for Vulcan.  */
+static const struct cpu_branch_cost vulcan_branch_cost =
+{
+  1,  /* Predictable.  */
+  3   /* Unpredictable.  */
+};
+
 /* Generic approximation modes.  */
 static const cpu_approx_modes generic_approx_modes =
 {
@@ -698,6 +748,31 @@ static const struct tune_params qdf24xx_tunings =
   (AARCH64_EXTRA_TUNE_NONE)		/* tune_flags.  */
 };
 
+static const struct tune_params vulcan_tunings =
+{
+  &vulcan_extra_costs,
+  &vulcan_addrcost_table,
+  &vulcan_regmove_cost,
+  &vulcan_vector_cost,
+  &vulcan_branch_cost,
+  &generic_approx_modes,
+  4, /* memmov_cost.  */
+  4, /* issue_rate.  */
+  AARCH64_FUSE_NOTHING, /* fuseable_ops.  */
+  16,	/* function_align.  */
+  8,	/* jump_align.  */
+  16,	/* loop_align.  */
+  3,	/* int_reassoc_width.  */
+  2,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
+  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+};
+
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
 {
-- 
2.1.0