[PATCH, vec-tails 04/10] Add masking cost
Ilya Enkovich
enkovich.gnu@gmail.com
Thu May 19 19:41:00 GMT 2016
Hi,
This patch extends vectorizer cost model to include masking cost by
adding new cost model locations and new target hook to compute
masking cost.
Thanks,
Ilya
--
gcc/
2016-05-19 Ilya Enkovich <ilya.enkovich@intel.com>
* config/i386/i386.c (ix86_init_cost): Extend costs array.
(ix86_add_stmt_masking_cost): New.
(ix86_finish_cost): Add masking_prologue_cost and masking_body_cost
args.
(TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
* config/i386/i386.h (TARGET_INCREASE_MASK_STORE_COST): New.
* config/i386/x86-tune.def (X86_TUNE_INCREASE_MASK_STORE_COST): New.
* config/rs6000/rs6000.c (_rs6000_cost_data): Extend cost array.
(rs6000_init_cost): Initialize new cost elements.
(rs6000_finish_cost): Add masking_prologue_cost and masking_body_cost.
* config/spu/spu.c (spu_init_cost): Extend costs array.
(spu_finish_cost): Add masking_prologue_cost and masking_body_cost args.
* doc/tm.texi.in (TARGET_VECTORIZE_ADD_STMT_MASKING_COST): New.
* doc/tm.texi: Regenerated.
* target.def (add_stmt_masking_cost): New.
(finish_cost): Add masking_prologue_cost and masking_body_cost args.
* target.h (enum vect_cost_for_stmt): Add vector_mask_load and
vector_mask_store.
(enum vect_cost_model_location): Add vect_masking_prologue
and vect_masking_body.
* targhooks.c (default_builtin_vectorization_cost): Support
vector_mask_load and vector_mask_store.
(default_init_cost): Extend costs array.
(default_add_stmt_masking_cost): New.
(default_finish_cost): Add masking_prologue_cost and masking_body_cost
args.
* targhooks.h (default_add_stmt_masking_cost): New.
* tree-vect-loop.c (vect_estimate_min_profitable_iters): Adjust
finish_cost call.
* tree-vect-slp.c (vect_bb_vectorization_profitable_p): Likewise.
* tree-vectorizer.h (add_stmt_masking_cost): New.
(finish_cost): Add masking_prologue_cost and masking_body_cost args.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9f62089..6c2c364 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -53932,8 +53932,12 @@ ix86_spill_class (reg_class_t rclass, machine_mode mode)
static void *
ix86_init_cost (struct loop *)
{
- unsigned *cost = XNEWVEC (unsigned, 3);
- cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+ unsigned *cost = XNEWVEC (unsigned, 5);
+ cost[vect_prologue] = 0;
+ cost[vect_body] = 0;
+ cost[vect_epilogue] = 0;
+ cost[vect_masking_prologue] = 0;
+ cost[vect_masking_body] = 0;
return cost;
}
@@ -53974,16 +53978,56 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
return retval;
}
+/* Implement targetm.vectorize.add_stmt_masking_cost. */
+
+static unsigned
+ix86_add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, int misalign,
+ enum vect_cost_model_location where)
+{
+ bool embedded_masking = false;
+ unsigned *cost = (unsigned *) data;
+ unsigned retval = 0;
+
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ if (vectype)
+ {
+ machine_mode mode
+ = ix86_get_mask_mode (TYPE_VECTOR_SUBPARTS (vectype),
+ tree_to_uhwi (TYPE_SIZE_UNIT (vectype)));
+ embedded_masking = !VECTOR_MODE_P (mode);
+ }
+ else
+ embedded_masking = TARGET_AVX512F;
+
+ if (embedded_masking || kind == vector_load)
+ return retval;
+
+ if (kind == vector_store)
+ return TARGET_INCREASE_MASK_STORE_COST ? 10 : 0;
+
+ int stmt_cost = ix86_builtin_vectorization_cost (vector_stmt, vectype, misalign);
+ retval = (unsigned) (count * stmt_cost);
+
+ cost[where] += retval;
+
+ return retval;
+}
+
/* Implement targetm.vectorize.finish_cost. */
static void
ix86_finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ unsigned *body_cost, unsigned *epilogue_cost,
+ unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost)
{
unsigned *cost = (unsigned *) data;
*prologue_cost = cost[vect_prologue];
*body_cost = cost[vect_body];
*epilogue_cost = cost[vect_epilogue];
+ *masking_prologue_cost = cost[vect_masking_prologue];
+ *masking_body_cost = cost[vect_masking_body];
}
/* Implement targetm.vectorize.destroy_cost_data. */
@@ -54964,6 +55008,8 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
#undef TARGET_VECTORIZE_ADD_STMT_COST
#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
+#undef TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+#define TARGET_VECTORIZE_ADD_STMT_MASKING_COST ix86_add_stmt_masking_cost
#undef TARGET_VECTORIZE_FINISH_COST
#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
#undef TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d0b418b..b42cfa2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -501,6 +501,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
#define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_INCREASE_MASK_STORE_COST \
+ ix86_tune_features[X86_TUNE_INCREASE_MASK_STORE_COST]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 31a87b9..3bbcee8 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -527,6 +527,11 @@ DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
+/* X86_TUNE_INCREASE_MASK_STORE_COST: Increase coast of masked store for
+ some platforms. */
+DEF_TUNE (X86_TUNE_INCREASE_MASK_STORE_COST, "increase_mask_store_cost",
+ m_HASWELL | m_BDVER4 | m_ZNVER1)
+
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0f70bb9..295deaf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5207,7 +5207,7 @@ rs6000_preferred_simd_mode (machine_mode mode)
typedef struct _rs6000_cost_data
{
struct loop *loop_info;
- unsigned cost[3];
+ unsigned cost[5];
} rs6000_cost_data;
/* Test for likely overcommitment of vector hardware resources. If a
@@ -5269,6 +5269,8 @@ rs6000_init_cost (struct loop *loop_info)
data->cost[vect_prologue] = 0;
data->cost[vect_body] = 0;
data->cost[vect_epilogue] = 0;
+ data->cost[vect_masking_prologue] = 0;
+ data->cost[vect_masking_body] = 0;
return data;
}
@@ -5304,7 +5306,9 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
static void
rs6000_finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ unsigned *body_cost, unsigned *epilogue_cost,
+ unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost)
{
rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
@@ -5314,6 +5318,8 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost,
*prologue_cost = cost_data->cost[vect_prologue];
*body_cost = cost_data->cost[vect_body];
*epilogue_cost = cost_data->cost[vect_epilogue];
+ *masking_prologue_cost = cost_data->cost[vect_masking_prologue];
+ *masking_body_cost = cost_data->cost[vect_masking_body];
}
/* Implement targetm.vectorize.destroy_cost_data. */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index c3757eb..60d6e6b 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -6630,8 +6630,12 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
static void *
spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
{
- unsigned *cost = XNEWVEC (unsigned, 3);
- cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+ unsigned *cost = XNEWVEC (unsigned, 5);
+ cost[vect_prologue] = 0;
+ cost[vect_body] = 0;
+ cost[vect_epilogue] = 0;
+ cost[vect_masking_prologue] = 0;
+ cost[vect_masking_body] = 0;
return cost;
}
@@ -6667,12 +6671,16 @@ spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
static void
spu_finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ unsigned *body_cost, unsigned *epilogue_cost,
+ unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost)
{
unsigned *cost = (unsigned *) data;
*prologue_cost = cost[vect_prologue];
*body_cost = cost[vect_body];
*epilogue_cost = cost[vect_epilogue];
+ *masking_prologue_cost = cost[vect_masking_prologue];
+ *masking_body_cost = cost[vect_masking_body];
}
/* Implement targetm.vectorize.destroy_cost_data. */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 057ac9a..5d23910 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5720,8 +5720,12 @@ This hook should initialize target-specific data structures in preparation for m
This hook should update the target-specific @var{data} in response to adding @var{count} copies of the given @var{kind} of statement to a loop or basic block. The default adds the builtin vectorizer cost for the copies of the statement to the accumulator specified by @var{where}, (the prologue, body, or epilogue) and returns the amount added. The return value should be viewed as a tentative cost that may later be revised.
@end deftypefn
-@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost})
-This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, and epilogue costs as unsigned integers. The default returns the value of the three accumulators.
+@deftypefn {Target Hook} unsigned TARGET_VECTORIZE_ADD_STMT_MASKING_COST (void *@var{data}, int @var{count}, enum vect_cost_for_stmt @var{kind}, struct _stmt_vec_info *@var{stmt_info}, int @var{misalign}, enum vect_cost_model_location @var{where})
+This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} but adds cost of statement masking.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_VECTORIZE_FINISH_COST (void *@var{data}, unsigned *@var{prologue_cost}, unsigned *@var{body_cost}, unsigned *@var{epilogue_cost}, unsigned *@var{masking_prologue_cost}, unsigned *@var{masking_body_cost})
+This hook should complete calculations of the cost of vectorizing a loop or basic block based on @var{data}, and return the prologue, body, epilogue, masking prologue and masking body costs as unsigned integers. The default returns the value of the five accumulators.
@end deftypefn
@deftypefn {Target Hook} void TARGET_VECTORIZE_DESTROY_COST_DATA (void *@var{data})
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 643f0eb..2e92b47 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4249,6 +4249,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_ADD_STMT_COST
+@hook TARGET_VECTORIZE_ADD_STMT_MASKING_COST
+
@hook TARGET_VECTORIZE_FINISH_COST
@hook TARGET_VECTORIZE_DESTROY_COST_DATA
diff --git a/gcc/target.def b/gcc/target.def
index 20f2b32..c1c6705 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1898,17 +1898,29 @@ DEFHOOK
enum vect_cost_model_location where),
default_add_stmt_cost)
+/* Similar to add_stmt_cost but records cost of statemnent masking. */
+DEFHOOK
+(add_stmt_masking_cost,
+ "This hook has arguments similar to @code{TARGET_VECTORIZE_ADD_STMT_COST} "
+ "but adds cost of statement masking.",
+ unsigned,
+ (void *data, int count, enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, int misalign,
+ enum vect_cost_model_location where),
+ default_add_stmt_masking_cost)
+
/* Target function to calculate the total cost of the current vectorized
loop or block. */
DEFHOOK
(finish_cost,
"This hook should complete calculations of the cost of vectorizing a loop "
- "or basic block based on @var{data}, and return the prologue, body, and "
- "epilogue costs as unsigned integers. The default returns the value of "
- "the three accumulators.",
+ "or basic block based on @var{data}, and return the prologue, body, epilogue, "
+ "masking prologue and masking body costs as unsigned integers. The default "
+ "returns the value of the five accumulators.",
void,
(void *data, unsigned *prologue_cost, unsigned *body_cost,
- unsigned *epilogue_cost),
+ unsigned *epilogue_cost, unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost),
default_finish_cost)
/* Function to delete target-specific cost modeling data. */
diff --git a/gcc/target.h b/gcc/target.h
index 43022bd..17e3803 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -170,7 +170,9 @@ enum vect_cost_for_stmt
cond_branch_taken,
vec_perm,
vec_promote_demote,
- vec_construct
+ vec_construct,
+ vector_mask_load,
+ vector_mask_store
};
/* Separate locations for which the vectorizer cost model should
@@ -178,7 +180,9 @@ enum vect_cost_for_stmt
enum vect_cost_model_location {
vect_prologue = 0,
vect_body = 1,
- vect_epilogue = 2
+ vect_epilogue = 2,
+ vect_masking_prologue = 3,
+ vect_masking_body = 4
};
/* The target structure. This holds all the backend hooks. */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b..072345d 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -579,6 +579,8 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case cond_branch_not_taken:
case vec_perm:
case vec_promote_demote:
+ case vector_mask_load:
+ case vector_mask_store:
return 1;
case unaligned_load:
@@ -1115,8 +1117,12 @@ default_get_mask_mode (unsigned nunits, unsigned vector_size)
void *
default_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
{
- unsigned *cost = XNEWVEC (unsigned, 3);
- cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+ unsigned *cost = XNEWVEC (unsigned, 5);
+ cost[vect_prologue] = 0;
+ cost[vect_body] = 0;
+ cost[vect_epilogue] = 0;
+ cost[vect_masking_prologue] = 0;
+ cost[vect_masking_body] = 0;
return cost;
}
@@ -1147,16 +1153,48 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
return retval;
}
+/* By default, the cost model assume we use VEC_COND_EXPR to mask
+ statement result. For memory accesses we need to adjust used mask
+ in case aceess is already masked. */
+
+unsigned
+default_add_stmt_masking_cost (void *data, int count,
+ enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info,
+ int misalign,
+ enum vect_cost_model_location where)
+{
+ unsigned *cost = (unsigned *) data;
+ unsigned retval = 0;
+
+ if (kind == vector_load || kind == vector_store)
+ return retval;
+
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = targetm.vectorize.builtin_vectorization_cost (vector_stmt,
+ vectype,
+ misalign);
+
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
+
+ return retval;
+}
+
/* By default, the cost model just returns the accumulated costs. */
void
default_finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ unsigned *body_cost, unsigned *epilogue_cost,
+ unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost)
{
unsigned *cost = (unsigned *) data;
*prologue_cost = cost[vect_prologue];
*body_cost = cost[vect_body];
*epilogue_cost = cost[vect_epilogue];
+ *masking_prologue_cost = cost[vect_masking_prologue];
+ *masking_body_cost = cost[vect_masking_body];
}
/* Free the cost data. */
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39..5a1c749 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -106,6 +106,10 @@ extern void *default_init_cost (struct loop *);
extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
struct _stmt_vec_info *, int,
enum vect_cost_model_location);
+extern unsigned default_add_stmt_masking_cost (void *, int,
+ enum vect_cost_for_stmt,
+ struct _stmt_vec_info *, int,
+ enum vect_cost_model_location);
extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
extern void default_destroy_cost_data (void *);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index a537ef4..e25a0ce 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3121,6 +3121,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
int vec_outside_cost = 0;
unsigned vec_prologue_cost = 0;
unsigned vec_epilogue_cost = 0;
+ unsigned masking_prologue_cost = 0;
+ unsigned masking_inside_cost = 0;
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -3339,7 +3341,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
/* Complete the target-specific cost calculations. */
finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
- &vec_inside_cost, &vec_epilogue_cost);
+ &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+ &masking_inside_cost);
vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d713848..09480ea 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2454,6 +2454,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
int i;
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
+ unsigned int masking_prologue_cost = 0, masking_inside_cost = 0;
/* Calculate scalar cost. */
FOR_EACH_VEC_ELT (slp_instances, i, instance)
@@ -2472,7 +2473,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
/* Complete the target-specific cost calculation. */
finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
- &vec_inside_cost, &vec_epilogue_cost);
+ &vec_inside_cost, &vec_epilogue_cost, &masking_prologue_cost,
+ &masking_inside_cost);
vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b269752..d3450b6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -897,13 +897,27 @@ add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
stmt_info, misalign, where);
}
+/* Alias targetm.vectorize.add_stmt_masking_cost. */
+
+static inline unsigned
+add_stmt_masking_cost (void *data, int count, enum vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info, int misalign,
+ enum vect_cost_model_location where)
+{
+ return targetm.vectorize.add_stmt_masking_cost (data, count, kind,
+ stmt_info, misalign, where);
+}
+
/* Alias targetm.vectorize.finish_cost. */
static inline void
finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ unsigned *body_cost, unsigned *epilogue_cost,
+ unsigned *masking_prologue_cost,
+ unsigned *masking_body_cost)
{
- targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost);
+ targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost,
+ masking_prologue_cost, masking_body_cost);
}
/* Alias targetm.vectorize.destroy_cost_data. */
More information about the Gcc-patches
mailing list