This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Optimize manual byte swap implementations v3


On Tue, Feb 10, 2009 at 4:17 PM, Andreas Krebbel
<krebbel@linux.vnet.ibm.com> wrote:
> Ok. Here is an updated version.

Really good now.

> +  if (bits < HOST_BITS_PER_WIDEST_INT)
> +    {
> +      bool negative = ((val >> (bits - 1)) & 1) != 0;
> +      if (negative)
> +       val |= (~(unsigned HOST_WIDEST_INT) 0) << (bits - 1) << 1;
> +      else
> +       val &= ~((~(unsigned HOST_WIDEST_INT) 0) << (bits - 1) << 1);
> +    }
> +

tree INTEGER_CSTs should be always canonical (that is, properly
sign or zero-extended to the full two-HOST_WIDE_INT width).  So,
at least in theory (;)) this part is not necessary (likewise in
int_cst_value).  So, can you wrap this inside #ifdef ENABLE_CHECKING
and assert that it doesn't change val?

If you include the feedback from Paolo in the other thread the
patch is ok for stage1.

Thanks,
Richard.

> Tested on s390, s390x and x86_64.
>
> Bye,
>
> -Andreas-
>
> 2009-02-10  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
>
>        * passes.c: Add bswap pass.
>        * tree-pass.h: Add pass_optimize_bswap declaration.
>        * tree-ssa-math-opts.c: Include diagnostics.h for print_gimple_stmt.
>        Include rtl.h, expr.h and optabs.h for optab_handler check.
>        (struct symbolic_number, pass_optimize_bswap): New definition.
>        (BSWAP_INITIAL_SYM_NUMBER, BSWAP_COMPARE_SYM_NUMBER): Likewise.
>        (BSWAP_RECURSION_LIMIT): Constant value definition added.
>        (do_shift_rotate, verify_symbolic_number_p): New functions.
>        (find_bswap_1, find_bswap, execute_optimize_bswap): New functions.
>        (gate_optimize_bswap): New function.
>        * tree.c (widest_int_cst_value): New function.
>        * tree.h (widest_int_cst_value): Prototype added.
>
> 2009-02-10  Andreas Krebbel  <Andreas.Krebbel@de.ibm.com>
>
>        * gcc.dg/optimize-bswap-1.c: New testcase.
>
>
> Index: gcc/passes.c
> ===================================================================
> --- gcc/passes.c.orig
> +++ gcc/passes.c
> @@ -638,6 +638,7 @@ init_optimization_passes (void)
>       NEXT_PASS (pass_copy_prop);
>       NEXT_PASS (pass_fold_builtins);
>       NEXT_PASS (pass_cse_sincos);
> +      NEXT_PASS (pass_optimize_bswap);
>       NEXT_PASS (pass_split_crit_edges);
>       NEXT_PASS (pass_pre);
>       NEXT_PASS (pass_sink_code);
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h.orig
> +++ gcc/tree-pass.h
> @@ -363,6 +363,7 @@ extern struct gimple_opt_pass pass_late_
>  extern struct gimple_opt_pass pass_cse_reciprocals;
>  extern struct gimple_opt_pass pass_cse_sincos;
>  extern struct gimple_opt_pass pass_convert_to_rsqrt;
> +extern struct gimple_opt_pass pass_optimize_bswap;
>  extern struct gimple_opt_pass pass_warn_function_return;
>  extern struct gimple_opt_pass pass_warn_function_noreturn;
>  extern struct gimple_opt_pass pass_cselim;
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c.orig
> +++ gcc/tree-ssa-math-opts.c
> @@ -97,7 +97,10 @@ along with GCC; see the file COPYING3.
>  #include "alloc-pool.h"
>  #include "basic-block.h"
>  #include "target.h"
> -
> +#include "diagnostic.h"
> +#include "rtl.h"
> +#include "expr.h"
> +#include "optabs.h"
>
>  /* This structure represents one basic block that either computes a
>    division, or is a common dominator for basic block that compute a
> @@ -879,3 +882,386 @@ struct gimple_opt_pass pass_convert_to_r
>     | TODO_verify_stmts                 /* todo_flags_finish */
>  }
>  };
> +
> +/* A symbolic number is used to detect byte permutation and selection
> +   patterns.  Therefore the field N contains an artificial number
> +   consisting of byte size markers:
> +
> +   0    - byte has the value 0
> +   1..size - byte contains the content of the byte
> +   number indexed with that value minus one  */
> +
> +struct symbolic_number {
> +  unsigned HOST_WIDEST_INT n;
> +  int size;
> +};
> +
> +/* In order to limit the number of instructions considered while
> +   looking for a bswap implementation this number defines the maximum
> +   depth of the search.  */
> +static const int BSWAP_RECURSION_LIMIT = 8;
> +
> +/* The number with which a symbolic number is initialized.  The
> +   number is shifted to the proper type size before using it.  */
> +static const unsigned HOST_WIDEST_INT BSWAP_INITIAL_SYM_NUMBER =
> +  sizeof (HOST_WIDEST_INT) < 8 ? 0 :
> +  (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
> +
> +/* The number which the find_bswap result should match in order to
> +   have a full byte swap.  The insignificant bytes are masked out
> +   before using it.  */
> +static const unsigned HOST_WIDEST_INT BSWAP_COMPARE_SYM_NUMBER =
> +  sizeof (HOST_WIDEST_INT) < 8 ? 0 :
> +  (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201;
> +
> +/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
> +   number N.  Return false if the requested operation is not permitted
> +   on a symbolic number.  */
> +
> +static inline bool
> +do_shift_rotate (enum tree_code code,
> +                struct symbolic_number *n,
> +                int count)
> +{
> +  if (count % 8 != 0)
> +    return false;
> +
> +  /* Zero out the extra bits of N in order to avoid them being shifted
> +     into the significant bits.  */
> +  if (n->size < (int)sizeof (HOST_WIDEST_INT))
> +    n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
> +
> +  switch (code)
> +    {
> +    case LSHIFT_EXPR:
> +      n->n <<= count;
> +      break;
> +    case RSHIFT_EXPR:
> +      n->n >>= count;
> +      break;
> +    case LROTATE_EXPR:
> +      n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
> +      break;
> +    case RROTATE_EXPR:
> +      n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
> +      break;
> +    default:
> +      return false;
> +    }
> +  return true;
> +}
> +
> +/* Perform sanity checking for the symbolic number N and the gimple
> +   statement STMT.  */
> +
> +static inline bool
> +verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
> +{
> +  tree lhs_type;
> +
> +  lhs_type = gimple_expr_type (stmt);
> +
> +  if (TREE_CODE (lhs_type) != INTEGER_TYPE)
> +    return false;
> +
> +  if ((int)TREE_INT_CST_LOW (TYPE_SIZE (lhs_type)) != n->size * BITS_PER_UNIT)
> +    return false;
> +
> +  return true;
> +}
> +
> +/* find_bswap_1 invokes itself recursively with N and tries to perform
> +   the operation given by the rhs of STMT on the result.  If the
> +   operation could successfully be executed the function returns the
> +   tree expression of the source operand and NULL otherwise.  */
> +
> +static tree
> +find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
> +{
> +  enum tree_code code;
> +  tree rhs1, rhs2 = NULL;
> +  gimple rhs1_stmt, rhs2_stmt;
> +  tree source_expr1;
> +
> +  if (!limit || !is_gimple_assign (stmt))
> +    return NULL;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +
> +  if (TREE_CODE (rhs1) != SSA_NAME)
> +    return NULL;
> +
> +  code = gimple_assign_rhs_code (stmt);
> +  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
> +
> +  if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
> +    rhs2 = gimple_assign_rhs2 (stmt);
> +
> +  /* Handle unary rhs and binary rhs with integer constants as second
> +     operand.  */
> +
> +  if (gimple_assign_rhs_class (stmt) == GIMPLE_UNARY_RHS
> +      || (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS
> +         && TREE_CODE (rhs2) == INTEGER_CST))
> +    {
> +      if (code != BIT_AND_EXPR
> +         && code != LSHIFT_EXPR
> +         && code != RSHIFT_EXPR
> +         && code != LROTATE_EXPR
> +         && code != RROTATE_EXPR
> +         && code != NOP_EXPR
> +         && code != CONVERT_EXPR)
> +       return NULL;
> +
> +      source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
> +
> +      /* If find_bswap_1 returned NULL STMT is a leaf node and we have
> +        to initialize the symbolic number.  */
> +      if (!source_expr1)
> +       {
> +         /* Set up the symbolic number N by setting each byte to a
> +            value between 1 and the byte size of rhs1.  The highest
> +            order byte is set to 1 and the lowest order byte to
> +            n.size.  */
> +         n->size = (int)TREE_INT_CST_LOW (TYPE_SIZE (
> +                      TREE_TYPE (rhs1))) / BITS_PER_UNIT;
> +
> +         n->n = BSWAP_INITIAL_SYM_NUMBER >>
> +           (sizeof (HOST_WIDEST_INT) - n->size) * BITS_PER_UNIT;
> +
> +         source_expr1 = rhs1;
> +       }
> +
> +      switch (code)
> +       {
> +       case BIT_AND_EXPR:
> +         {
> +           int i;
> +           unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
> +           unsigned HOST_WIDEST_INT tmp = val;
> +
> +           /* Only constants masking full bytes are allowed.  */
> +           for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
> +             if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
> +               return false;
> +
> +           n->n &= val;
> +         }
> +         break;
> +       case LSHIFT_EXPR:
> +       case RSHIFT_EXPR:
> +       case LROTATE_EXPR:
> +       case RROTATE_EXPR:
> +         if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
> +           return NULL;
> +         break;
> +       CASE_CONVERT:
> +         {
> +           int type_size;
> +
> +           type_size = (int)TREE_INT_CST_LOW (TYPE_SIZE_UNIT (
> +                              gimple_expr_type (stmt)));
> +
> +           if (type_size < (int)(sizeof (HOST_WIDEST_INT)))
> +             {
> +               /* If STMT casts to a smaller type mask out the bits not
> +                  belonging to the target type.  */
> +               n->size = type_size;
> +               n->n &= ((unsigned HOST_WIDEST_INT)1 <<
> +                        (type_size * BITS_PER_UNIT)) - 1;
> +             }
> +         }
> +         break;
> +       default:
> +         return NULL;
> +       };
> +      return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
> +    }
> +
> +  /* Handle binary rhs.  */
> +
> +  if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
> +    {
> +      struct symbolic_number n1, n2;
> +      tree source_expr2;
> +
> +      if (code != BIT_IOR_EXPR)
> +       return NULL;
> +
> +      if (TREE_CODE (rhs2) != SSA_NAME)
> +       return NULL;
> +
> +      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
> +
> +      switch (code)
> +       {
> +       case BIT_IOR_EXPR:
> +         source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
> +
> +         if (!source_expr1)
> +           return NULL;
> +
> +         source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
> +
> +         if (source_expr1 != source_expr2
> +             || n1.size != n2.size)
> +           return NULL;
> +
> +         n->size = n1.size;
> +         n->n = n1.n | n2.n;
> +
> +         if (!verify_symbolic_number_p (n, stmt))
> +           return NULL;
> +
> +         break;
> +       default:
> +         return NULL;
> +       }
> +      return source_expr1;
> +    }
> +  return NULL;
> +}
> +
> +/* Check if STMT completes a bswap implementation consisting of ORs,
> +   SHIFTs and ANDs.  Return the source tree expression on which the
> +   byte swap is performed and NULL if no bswap was found.  */
> +
> +static tree
> +find_bswap (gimple stmt)
> +{
> +  struct symbolic_number n;
> +  tree source_expr;
> +  unsigned HOST_WIDEST_INT cmp = BSWAP_COMPARE_SYM_NUMBER;
> +
> +  source_expr =  find_bswap_1 (stmt, &n, BSWAP_RECURSION_LIMIT);
> +
> +  if (!source_expr)
> +    return NULL;
> +
> +  /* Zero out the extra bits of N and CMP.  */
> +  if (n.size < (int)sizeof (HOST_WIDEST_INT))
> +    {
> +      unsigned HOST_WIDEST_INT mask =
> +       ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
> +
> +      n.n &= mask;
> +      cmp &= mask;
> +    }
> +
> +  /* A complete byte swap should make the symbolic number to start
> +     with the largest digit in the highest order byte.  */
> +  if (cmp != n.n)
> +    return NULL;
> +
> +  return source_expr;
> +}
> +
> +/* Find manual byte swap implementations and turn them into a bswap
> +   builtin invokation.  */
> +
> +static unsigned int
> +execute_optimize_bswap (void)
> +{
> +  basic_block bb;
> +  bool bswap32_p, bswap64_p;
> +
> +  if (BITS_PER_UNIT != 8)
> +    return 0;
> +
> +  if (sizeof (HOST_WIDEST_INT) < 8)
> +    return 0;
> +
> +  bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
> +              && optab_handler (bswap_optab,SImode)->insn_code !=
> +              CODE_FOR_nothing);
> +  bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
> +              && optab_handler (bswap_optab,DImode)->insn_code !=
> +              CODE_FOR_nothing);
> +
> +  if (!bswap32_p && !bswap64_p)
> +    return 0;
> +
> +  FOR_EACH_BB (bb)
> +    {
> +      gimple_stmt_iterator gsi;
> +
> +      for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +        {
> +         gimple stmt = gsi_stmt (gsi);
> +         tree bswap_src;
> +         tree fndecl = NULL_TREE;
> +         int type_size;
> +         gimple call;
> +
> +         if (!is_gimple_assign (stmt)
> +             || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
> +           continue;
> +
> +         type_size = (int)TREE_INT_CST_LOW (TYPE_SIZE (
> +                            gimple_expr_type (stmt)));
> +
> +         switch (type_size)
> +           {
> +           case 32:
> +             if (bswap32_p)
> +               fndecl = built_in_decls[BUILT_IN_BSWAP32];
> +             break;
> +           case 64:
> +             if (bswap64_p)
> +               fndecl = built_in_decls[BUILT_IN_BSWAP64];
> +             break;
> +           default:
> +             continue;
> +           }
> +
> +         if (!fndecl)
> +           continue;
> +
> +         bswap_src = find_bswap (stmt);
> +
> +         if (!bswap_src)
> +           continue;
> +
> +         call = gimple_build_call (fndecl, 1, bswap_src);
> +         gimple_call_set_lhs (call, gimple_assign_lhs (stmt));
> +
> +         if (dump_file)
> +           {
> +             fprintf (dump_file, "%d bit bswap implementation found at: ",
> +                      (int)type_size);
> +             print_gimple_stmt (dump_file, stmt, 0, 0);
> +           }
> +
> +         gsi_insert_after (&gsi, call, GSI_SAME_STMT);
> +         gsi_remove (&gsi, true);
> +       }
> +    }
> +
> +  return 0;
> +}
> +
> +static bool
> +gate_optimize_bswap (void)
> +{
> +  return flag_expensive_optimizations && optimize;
> +}
> +
> +struct gimple_opt_pass pass_optimize_bswap =
> +{
> + {
> +  GIMPLE_PASS,
> +  "bswap",                             /* name */
> +  gate_optimize_bswap,                  /* gate */
> +  execute_optimize_bswap,              /* execute */
> +  NULL,                                        /* sub */
> +  NULL,                                        /* next */
> +  0,                                   /* static_pass_number */
> +  0,                                   /* tv_id */
> +  PROP_ssa,                            /* properties_required */
> +  0,                                   /* properties_provided */
> +  0,                                   /* properties_destroyed */
> +  0,                                   /* todo_flags_start */
> +  TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
> +    | TODO_verify_stmts                 /* todo_flags_finish */
> + }
> +};
> Index: gcc/testsuite/gcc.dg/optimize-bswap-1.c
> ===================================================================
> --- /dev/null
> +++ gcc/testsuite/gcc.dg/optimize-bswap-1.c
> @@ -0,0 +1,61 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target stdint_types } */
> +/* { dg-options "-O2 -fdump-tree-bswap" } */
> +
> +#include <stdint.h>
> +
> +#define __const_swab32(x) ((uint32_t)(                               \
> +        (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) |            \
> +        (((uint32_t)(x) & (uint32_t)0x0000ff00UL) <<  8) |            \
> +        (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >>  8) |            \
> +        (((uint32_t)(x) & (uint32_t)0xff000000UL) >> 24)))
> +
> +#define __const_swab64(x) ((uint64_t)(                                         \
> +       (((uint64_t)(x) & (uint64_t)0x00000000000000ffULL) << 56) |             \
> +       (((uint64_t)(x) & (uint64_t)0x000000000000ff00ULL) << 40) |             \
> +       (((uint64_t)(x) & (uint64_t)0x0000000000ff0000ULL) << 24) |             \
> +       (((uint64_t)(x) & (uint64_t)0x00000000ff000000ULL) <<  8) |             \
> +       (((uint64_t)(x) & (uint64_t)0x000000ff00000000ULL) >>  8) |             \
> +       (((uint64_t)(x) & (uint64_t)0x0000ff0000000000ULL) >> 24) |             \
> +       (((uint64_t)(x) & (uint64_t)0x00ff000000000000ULL) >> 40) |             \
> +       (((uint64_t)(x) & (uint64_t)0xff00000000000000ULL) >> 56)))
> +
> +/* This byte swap implementation is used by the Linux kernel and the
> +   GNU C library.  */
> +
> +uint32_t
> +swap32_a (uint32_t in)
> +{
> +  return __const_swab32 (in);
> +}
> +
> +uint64_t
> +swap64 (uint64_t in)
> +{
> +  return __const_swab64 (in);
> +}
> +
> +/* The OpenSSH byte swap implementation.  */
> +uint32_t
> +swap32_b (uint32_t in)
> +{
> +  uint32_t a;
> +
> +  a = (in << 16) | (in >> 16);
> +  a = ((a & 0x00ff00ff) << 8) | ((a & 0xff00ff00) >> 8);
> +
> +  return a;
> +}
> +
> +uint32_t
> +swap32_c (uint32_t in)
> +{
> +  return ((in << 24)
> +         | (((uint8_t)(in >> 8) << 16))
> +         | (((uint8_t)(in >> 16) << 8))
> +         | (in >> 24));
> +}
> +
> +/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" } } */
> +/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 1 "bswap" } } */
> +/* { dg-final { cleanup-tree-dump "bswap" } } */
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c.orig
> +++ gcc/tree.c
> @@ -8308,6 +8308,35 @@ int_cst_value (const_tree x)
>   return val;
>  }
>
> +/* Return value of a constant X and sign-extend it.  */
> +
> +HOST_WIDEST_INT
> +widest_int_cst_value (const_tree x)
> +{
> +  unsigned bits = TYPE_PRECISION (TREE_TYPE (x));
> +  unsigned HOST_WIDEST_INT val = TREE_INT_CST_LOW (x);
> +
> +#if HOST_BITS_PER_WIDEST_INT > HOST_BITS_PER_WIDE_INT
> +  gcc_assert (HOST_BITS_PER_WIDEST_INT >= 2 * HOST_BITS_PER_WIDE_INT);
> +  val |= TREE_INT_CST_HIGH (x) << HOST_BITS_PER_WIDE_INT;
> +#else
> +  /* Make sure the sign-extended value will fit in a HOST_WIDE_INT.  */
> +  gcc_assert (TREE_INT_CST_HIGH (x) == 0
> +             || TREE_INT_CST_HIGH (x) == -1);
> +#endif
> +
> +  if (bits < HOST_BITS_PER_WIDEST_INT)
> +    {
> +      bool negative = ((val >> (bits - 1)) & 1) != 0;
> +      if (negative)
> +       val |= (~(unsigned HOST_WIDEST_INT) 0) << (bits - 1) << 1;
> +      else
> +       val &= ~((~(unsigned HOST_WIDEST_INT) 0) << (bits - 1) << 1);
> +    }
> +
> +  return val;
> +}
> +
>  /* If TYPE is an integral type, return an equivalent type which is
>     unsigned iff UNSIGNEDP is true.  If TYPE is not an integral type,
>     return TYPE itself.  */
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h.orig
> +++ gcc/tree.h
> @@ -4932,6 +4932,7 @@ extern void build_common_builtin_nodes (
>  extern tree build_nonstandard_integer_type (unsigned HOST_WIDE_INT, int);
>  extern tree build_range_type (tree, tree, tree);
>  extern HOST_WIDE_INT int_cst_value (const_tree);
> +extern HOST_WIDEST_INT widest_int_cst_value (const_tree);
>  extern tree build_addr (tree, tree);
>
>  extern bool fields_compatible_p (const_tree, const_tree);
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]