[PATCH i386][google]With -mtune=core2, avoid generating the slow unaligned vector load/store (issue5488054)

Tue Dec 13 07:49:00 GMT 2011

> +/* Returns true if the vector load/store is unaligned and if
> +   unaligned vector load/stores are slow.  */

document STMT.

>
> +static bool
> +is_slow_vect_unaligned_load_store (gimple stmt)
> +{
> +  stmt_vec_info stmt_info;
> +  struct data_reference *dr = NULL;
> +
> +  /* Are unaligned load/stores slow for this target?  */
> +  if (!targetm.slow_unaligned_vector_memop
> +      || !targetm.slow_unaligned_vector_memop ())
> +    return false;
> +
> +  /* Harmful only if it is in a hot region of code when profiles are
> +     available.  */
> +  if (profile_status == PROFILE_READ
> +      && !maybe_hot_bb_p (gimple_bb (stmt)))
> +    return false;

Is this check necessary?

> +
> +  stmt_info = vinfo_for_stmt (stmt);
> +  if (!stmt_info)
> +    return false;
> +
> +  /* Check if access is aligned?.  */
> +  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
> +    {
> +      gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> +      if (first_stmt
> +         && vinfo_for_stmt (first_stmt))
> +        dr =  STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> +    }
> +  else
> +    {
> +      dr = STMT_VINFO_DATA_REF (stmt_info);
> +    }

Remove {}

> +
> +  if (!dr)
> +    return false;
> +
> +  if (!aligned_access_p (dr))
> +   {
> +     return true;
> +   }

Remove {}

> +
> +  return false;
> +}
> +
>  /* Make sure the statement is vectorizable.  */
>
>  bool
> @@ -5065,27 +5112,43 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vect
>    if (!bb_vinfo
>        && (STMT_VINFO_RELEVANT_P (stmt_info)
>            || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
> +    {
>       ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
>             || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
>             || vectorizable_conversion (stmt, NULL, NULL, NULL)
>             || vectorizable_shift (stmt, NULL, NULL, NULL)
>             || vectorizable_operation (stmt, NULL, NULL, NULL)
>             || vectorizable_assignment (stmt, NULL, NULL, NULL)
> -            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
>             || vectorizable_call (stmt, NULL, NULL)
> -            || vectorizable_store (stmt, NULL, NULL, NULL)
> -            || vectorizable_reduction (stmt, NULL, NULL, NULL)
> +           || vectorizable_reduction (stmt, NULL, NULL, NULL)
>             || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
> +
> +      if (!ok)
> +       {
> +          ok = (vectorizable_load (stmt, NULL, NULL, NULL, NULL)
> +               || vectorizable_store (stmt, NULL, NULL, NULL));
> +
> +         if (ok && is_slow_vect_unaligned_load_store (stmt))
> +           ok = false;
> +       }
> +    }
>     else
>       {
>         if (bb_vinfo)
> -          ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> -                || vectorizable_type_demotion (stmt, NULL, NULL, node)
> -               || vectorizable_shift (stmt, NULL, NULL, node)
> -                || vectorizable_operation (stmt, NULL, NULL, node)
> -                || vectorizable_assignment (stmt, NULL, NULL, node)
> -                || vectorizable_load (stmt, NULL, NULL, node, NULL)
> -                || vectorizable_store (stmt, NULL, NULL, node));
> +         {
> +           ok = (vectorizable_type_promotion (stmt, NULL, NULL, node)
> +                 || vectorizable_type_demotion (stmt, NULL, NULL, node)
> +                 || vectorizable_shift (stmt, NULL, NULL, node)
> +                  || vectorizable_operation (stmt, NULL, NULL, node)
> +                  || vectorizable_assignment (stmt, NULL, NULL, node));
> +            if (!ok)
> +             {
> +                ok = (vectorizable_load (stmt, NULL, NULL, node, NULL)
> +                     || vectorizable_store (stmt, NULL, NULL, node));
> +               if (ok && is_slow_vect_unaligned_load_store (stmt))
> +                 ok = false;
> +             }
> +         }
>       }
>

Same question as Ira has asked -- why not doing the check in
vectorizable_load|store ?

David

>   if (!ok)
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c  (revision 182265)
> +++ config/i386/i386.c  (working copy)
> @@ -26464,6 +26464,24 @@ ix86_init_mmx_sse_builtins (void)
>     }
>  }
>
> +/* Detect if this unaligned vectorizable load/stores should be
> +   considered slow.  This is true for core2 where the movdqu insn
> +   is slow, ~5x slower than the movdqa.  */
> +
> +static bool
> +ix86_slow_unaligned_vector_memop (void)
> +{
> +  /* This is known to be slow on core2.  */
> +  if (ix86_tune == PROCESSOR_CORE2_64
> +      || ix86_tune == PROCESSOR_CORE2_32)
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Internal method for ix86_init_builtins.  */
>
>  static void
> @@ -36624,6 +36642,9 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct
>  #undef TARGET_BUILD_BUILTIN_VA_LIST
>  #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
>
> +#undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP
> +#define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop
> +
>  #undef TARGET_ENUM_VA_LIST_P
>  #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
>
>
> --
> This patch is available for review at http://codereview.appspot.com/5488054