This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] k8 and amdfam10 cost model tuning


123456789012345678901234567890123456789012345678901234567890123456789012
34567890
This is a fix to a problem found with the polyhedron benchmark suite,
where 
the tuned costs caused a big drop in performance for mdbx. Below are the

numbers for polyhedron (standard runs). I am also investigating induct,
which 
is showing a performance degradation, but this patch is specifically
aimed at 
modifying the branch costs. 

K8
-------------------------------------------
Flags used: -march=k8 -mtune=k8 -msse3 -O3 -ftree-vectorize 
            -fvect-cost-model -ffast-math -ftree-loop-linear

Note: Original is the orininal k8 tuning, not the generic costs

Name	    Orig.(s) New (s)  Improvement (%)
ac        15.71    15.69     0.1
aermod    34.36    34.62    -0.8
air       12.49    12.40     0.7
capacita  81.48    82.25    -0.9
channel    9.45     9.47    -0.2
doduc     44.44    42.80     3.7
fatigue   10.42    10.41     0.1
gas_dyn   11.72    11.53     1.6
induct    56.22    56.17     0.1
linpk     18.95    18.93     0.1
mdbx      24.17    17.98    25.6
nf        27.99    27.78     0.8
protein   70.80    70.73     0.1
rnflow    41.27    40.89     0.9
test_fpu  19.58    19.59    -0.1
tfft       7.20     7.25    -0.7
------------------------------------------


Barcelona
-------------------------
Flags used: -march=amdfam10 -mtune=amdfam10 -msse3 -O3 -ftree-vectorize 
            -fvect-cost-model -ffast-math -ftree-loop-linear

Note: Original is the orininal amdfam10 tuning, not the generic costs

Name     Orig (s)  New (s)  Improvement (%)
ac        15.04    15.02    0.1
aermod    36.06    36.46   -1.1 
air       11.79    11.55    2.0
capacita  81.33    81.23    0.1
channel    6.59     6.48    1.7
doduc     45.21    45.12    0.2
fatigue   10.78    10.69    0.8
gas_dyn    9.08     9.15   -0.8
induct    75.11    75.12    0.0
linpk     19.80    19.74    0.3
mdbx      24.04    18.18   24.4
nf        27.98    28.01   -0.1
protein   65.70    65.57    0.2
rnflow    41.28    41.47   -0.5
test_fpu  15.31    15.21    0.7
tfft      07.53     7.53    0.0
---------------------------------------------


Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 130936)
+++ config/i386/i386.c  (working copy)
@@ -700,7 +700,7 @@ struct processor_costs k8_cost = {
      to limit number of prefetches at all, as their execution also
takes some
      time).  */
   100,                                 /* number of parallel prefetches
*/
-  5,                                   /* Branch cost */
+  3,                                   /* Branch cost */
   COSTS_N_INSNS (4),                   /* cost of FADD and FSUB insns.
*/
   COSTS_N_INSNS (4),                   /* cost of FMUL instruction.  */
   COSTS_N_INSNS (19),                  /* cost of FDIV instruction.  */
@@ -724,8 +724,8 @@ struct processor_costs k8_cost = {
   2,                                    /* vec_align_load_cost.  */
   3,                                    /* vec_unalign_load_cost.  */
   3,                                    /* vec_store_cost.  */
-  6,                                    /* cond_taken_branch_cost.  */
-  1,                                    /* cond_not_taken_branch_cost.
*/
+  3,                                    /* cond_taken_branch_cost.  */
+  2,                                    /* cond_not_taken_branch_cost.
*/
 };
 
 struct processor_costs amdfam10_cost = {
@@ -786,7 +786,7 @@ struct processor_costs amdfam10_cost = {
      to limit number of prefetches at all, as their execution also
takes some
      time).  */
   100,                                 /* number of parallel prefetches
*/
-  5,                                   /* Branch cost */
+  2,                                   /* Branch cost */
   COSTS_N_INSNS (4),                   /* cost of FADD and FSUB insns.
*/
   COSTS_N_INSNS (4),                   /* cost of FMUL instruction.  */
   COSTS_N_INSNS (19),                  /* cost of FDIV instruction.  */
@@ -811,7 +811,7 @@ struct processor_costs amdfam10_cost = {
   2,                                    /* vec_align_load_cost.  */
   2,                                    /* vec_unalign_load_cost.  */
   2,                                    /* vec_store_cost.  */
-  6,                                    /* cond_taken_branch_cost.  */
+  2,                                    /* cond_taken_branch_cost.  */
   1,                                    /* cond_not_taken_branch_cost.
*/
 };



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]