[PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)
Xinliang David Li
davidxl@google.com
Tue Dec 13 07:49:00 GMT 2011
> +/* Returns true if the vector load/store is unaligned and if
> + unaligned vector load/stores are slow. */
document STMT.
>
> +static bool
> +is_slow_vect_unaligned_load_store (gimple stmt)
> +{
> + stmt_vec_info stmt_info;
> + struct data_reference *dr = NULL;
> +
> + /* Are unaligned load/stores slow for this target? */
> + if (!targetm.slow_unaligned_vector_memop
> + || !targetm.slow_unaligned_vector_memop ())
> + return false;
> +
> + /* Harmful only if it is in a hot region of code when profiles are
> + available. */
> + if (profile_status == PROFILE_READ
> + && !maybe_hot_bb_p (gimple_bb (stmt)))
> + return false;
Is this check necessary?
> +
> + stmt_info = vinfo_for_stmt (stmt);
> + if (!stmt_info)
> + return false;
> +
> + /* Check if access is aligned?. */
> + if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
> + {
> + gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> + if (first_stmt
> + && vinfo_for_stmt (first_stmt))
> + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> + }
> + else
> + {
> + dr = STMT_VINFO_DATA_REF (stmt_info);
> + }
Remove {}
> +
> + if (!dr)
> + return false;
> +
> + if (!aligned_access_p (dr))
> + {
> + return true;
> + }
Remove {}
> +
> + return false;
> +}
> +
> /* Make sure the statement is vectorizable. */
>
> bool
> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect
> if (!bb_vinfo
> && (STMT_VINFO_RELEVANT_P (stmt_info)
> || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
> + {
> ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
> || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
> || vectorizable_conversion (stmt, NULL, NULL, NULL)
> || vectorizable_shift (stmt, NULL, NULL, NULL)
> || vectorizable_operation (stmt, NULL, NULL, NULL)
> || vectorizable_assignment (stmt, NULL, NULL, NULL)
> - || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> || vectorizable_call (stmt, NULL, NULL)
> - || vectorizable_store (stmt, NULL, NULL, NULL)
> - || vectorizable_reduction (stmt, NULL, NULL, NULL)
> + || vectorizable_reduction (stmt, NULL, NULL, NULL)
> || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
> +
> + if (!ok)
> + {
> + ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> + || vectorizable_store (stmt, NULL, NULL, NULL));
> +
> + if (ok && is_slow_vect_unaligned_load_store (stmt))
> + ok = false;
> + }
> + }
> else
> {
> if (bb_vinfo)
> - ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> - || vectorizable_type_demotion (stmt, NULL, NULL, node)
> - || vectorizable_shift (stmt, NULL, NULL, node)
> - || vectorizable_operation (stmt, NULL, NULL, node)
> - || vectorizable_assignment (stmt, NULL, NULL, node)
> - || vectorizable_load (stmt, NULL, NULL, node, NULL)
> - || vectorizable_store (stmt, NULL, NULL, node));
> + {
> + ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> + || vectorizable_type_demotion (stmt, NULL, NULL, node)
> + || vectorizable_shift (stmt, NULL, NULL, node)
> + || vectorizable_operation (stmt, NULL, NULL, node)
> + || vectorizable_assignment (stmt, NULL, NULL, node));
> + if (!ok)
> + {
> + ok = (vectorizable_load (stmt, NULL, NULL, node, NULL)
> + || vectorizable_store (stmt, NULL, NULL, node));
> + if (ok && is_slow_vect_unaligned_load_store (stmt))
> + ok = false;
> + }
> + }
> }
>
Same question as Ira has asked -- why not doing the check in
vectorizable_load|store ?
David
> if (!ok)
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c (revision 182265)
> +++ config/i386/i386.c (working copy)
> @@ -26464,6 +26464,24 @@ ix86_init_mmx_sse_builtins (void)
> }
> }
>
> +/* Detect if this unaligned vectorizable load/stores should be
> + considered slow. This is true for core2 where the movdqu insn
> + is slow, ~5x slower than the movdqa. */
> +
> +static bool
> +ix86_slow_unaligned_vector_memop (void)
> +{
> + /* This is known to be slow on core2. */
> + if (ix86_tune == PROCESSOR_CORE2_64
> + || ix86_tune == PROCESSOR_CORE2_32)
> + return true;
> +
> + return false;
> +}
> +
> /* Internal method for ix86_init_builtins. */
>
> static void
> @@ -36624,6 +36642,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct
> #undef TARGET_BUILD_BUILTIN_VA_LIST
> #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
>
> +#undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
> +#define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop
> +
> #undef TARGET_ENUM_VA_LIST_P
> #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
>
>
> --
> This patch is available for review at http://codereview.appspot.com/5488054
More information about the Gcc-patches
mailing list