This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Move bswap pass into gimple-ssa-store-merging.c


Hi!

I'll be working on further store-merging improvements next two days
and hope to use some of the bswap pass APIs to handle stuff like:
void foo (char *__restrict p, char *__restrict q)
{
  p[0] = q[3];
  p[1] = q[2];
  p[2] = q[1];
  p[3] = q[0];
}
or
  p[4] = data >> 8;
  p[5] = data;
etc.  As a precondition for that, this patch just moves the whole bswap
pass from tree-ssa-math-opts.c, without too many changes (mostly just
putting stuff into anon namespace, removing unneeded static keywords, and
moving some test from execute to gate).  Bootstrapped/regtested on
x86_64-linux and i686-linux, ok for trunk?  (Wouldn't commit it anyway
until further patches that actually need it are approved).

2017-11-14  Jakub Jelinek  <jakub@redhat.com>

	* tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
	BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
	CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
	init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
	find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap,
	class pass_optimize_bswap, bswap_replace,
	pass_optimize_bswap::execute): Moved to ...
	* gimple-ssa-store-merging.c: ... this file.
	Include optabs-tree.h.
	(nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p,
	init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
	find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into
	anonymous namespace, remove static keywords.
	(pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here...
	(pass_optimize_bswap::execute): ... rather than here.

--- gcc/tree-ssa-math-opts.c.jj	2017-11-06 17:24:15.000000000 +0100
+++ gcc/tree-ssa-math-opts.c	2017-11-14 17:08:28.831697803 +0100
@@ -167,18 +167,6 @@ static struct
 
 static struct
 {
-  /* Number of hand-written 16-bit nop / bswaps found.  */
-  int found_16bit;
-
-  /* Number of hand-written 32-bit nop / bswaps found.  */
-  int found_32bit;
-
-  /* Number of hand-written 64-bit nop / bswaps found.  */
-  int found_64bit;
-} nop_stats, bswap_stats;
-
-static struct
-{
   /* Number of widening multiplication ops inserted.  */
   int widen_mults_inserted;
 
@@ -1934,1070 +1922,6 @@ make_pass_cse_sincos (gcc::context *ctxt
   return new pass_cse_sincos (ctxt);
 }
 
-/* A symbolic number structure is used to detect byte permutation and selection
-   patterns of a source.  To achieve that, its field N contains an artificial
-   number consisting of BITS_PER_MARKER sized markers tracking where does each
-   byte come from in the source:
-
-   0	   - target byte has the value 0
-   FF	   - target byte has an unknown value (eg. due to sign extension)
-   1..size - marker value is the byte index in the source (0 for lsb).
-
-   To detect permutations on memory sources (arrays and structures), a symbolic
-   number is also associated:
-   - a base address BASE_ADDR and an OFFSET giving the address of the source;
-   - a range which gives the difference between the highest and lowest accessed
-     memory location to make such a symbolic number;
-   - the address SRC of the source element of lowest address as a convenience
-     to easily get BASE_ADDR + offset + lowest bytepos;
-   - number of expressions N_OPS bitwise ored together to represent
-     approximate cost of the computation.
-
-   Note 1: the range is different from size as size reflects the size of the
-   type of the current expression.  For instance, for an array char a[],
-   (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
-   (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
-   time a range of 1.
-
-   Note 2: for non-memory sources, range holds the same value as size.
-
-   Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
-
-struct symbolic_number {
-  uint64_t n;
-  tree type;
-  tree base_addr;
-  tree offset;
-  HOST_WIDE_INT bytepos;
-  tree src;
-  tree alias_set;
-  tree vuse;
-  unsigned HOST_WIDE_INT range;
-  int n_ops;
-};
-
-#define BITS_PER_MARKER 8
-#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
-#define MARKER_BYTE_UNKNOWN MARKER_MASK
-#define HEAD_MARKER(n, size) \
-  ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
-
-/* The number which the find_bswap_or_nop_1 result should match in
-   order to have a nop.  The number is masked according to the size of
-   the symbolic number before using it.  */
-#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
-  (uint64_t)0x08070605 << 32 | 0x04030201)
-
-/* The number which the find_bswap_or_nop_1 result should match in
-   order to have a byte swap.  The number is masked according to the
-   size of the symbolic number before using it.  */
-#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
-  (uint64_t)0x01020304 << 32 | 0x05060708)
-
-/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
-   number N.  Return false if the requested operation is not permitted
-   on a symbolic number.  */
-
-static inline bool
-do_shift_rotate (enum tree_code code,
-		 struct symbolic_number *n,
-		 int count)
-{
-  int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-  unsigned head_marker;
-
-  if (count % BITS_PER_UNIT != 0)
-    return false;
-  count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
-
-  /* Zero out the extra bits of N in order to avoid them being shifted
-     into the significant bits.  */
-  if (size < 64 / BITS_PER_MARKER)
-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
-
-  switch (code)
-    {
-    case LSHIFT_EXPR:
-      n->n <<= count;
-      break;
-    case RSHIFT_EXPR:
-      head_marker = HEAD_MARKER (n->n, size);
-      n->n >>= count;
-      /* Arithmetic shift of signed type: result is dependent on the value.  */
-      if (!TYPE_UNSIGNED (n->type) && head_marker)
-	for (i = 0; i < count / BITS_PER_MARKER; i++)
-	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
-		  << ((size - 1 - i) * BITS_PER_MARKER);
-      break;
-    case LROTATE_EXPR:
-      n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
-      break;
-    case RROTATE_EXPR:
-      n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
-      break;
-    default:
-      return false;
-    }
-  /* Zero unused bits for size.  */
-  if (size < 64 / BITS_PER_MARKER)
-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
-  return true;
-}
-
-/* Perform sanity checking for the symbolic number N and the gimple
-   statement STMT.  */
-
-static inline bool
-verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
-{
-  tree lhs_type;
-
-  lhs_type = gimple_expr_type (stmt);
-
-  if (TREE_CODE (lhs_type) != INTEGER_TYPE)
-    return false;
-
-  if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
-    return false;
-
-  return true;
-}
-
-/* Initialize the symbolic number N for the bswap pass from the base element
-   SRC manipulated by the bitwise OR expression.  */
-
-static bool
-init_symbolic_number (struct symbolic_number *n, tree src)
-{
-  int size;
-
-  if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
-    return false;
-
-  n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
-  n->src = src;
-
-  /* Set up the symbolic number N by setting each byte to a value between 1 and
-     the byte size of rhs1.  The highest order byte is set to n->size and the
-     lowest order byte to 1.  */
-  n->type = TREE_TYPE (src);
-  size = TYPE_PRECISION (n->type);
-  if (size % BITS_PER_UNIT != 0)
-    return false;
-  size /= BITS_PER_UNIT;
-  if (size > 64 / BITS_PER_MARKER)
-    return false;
-  n->range = size;
-  n->n = CMPNOP;
-  n->n_ops = 1;
-
-  if (size < 64 / BITS_PER_MARKER)
-    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
-
-  return true;
-}
-
-/* Check if STMT might be a byte swap or a nop from a memory source and returns
-   the answer. If so, REF is that memory source and the base of the memory area
-   accessed and the offset of the access from that base are recorded in N.  */
-
-bool
-find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
-{
-  /* Leaf node is an array or component ref. Memorize its base and
-     offset from base to compare to other such leaf node.  */
-  HOST_WIDE_INT bitsize, bitpos;
-  machine_mode mode;
-  int unsignedp, reversep, volatilep;
-  tree offset, base_addr;
-
-  /* Not prepared to handle PDP endian.  */
-  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
-    return false;
-
-  if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
-    return false;
-
-  base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
-				   &unsignedp, &reversep, &volatilep);
-
-  if (TREE_CODE (base_addr) == MEM_REF)
-    {
-      offset_int bit_offset = 0;
-      tree off = TREE_OPERAND (base_addr, 1);
-
-      if (!integer_zerop (off))
-	{
-	  offset_int boff, coff = mem_ref_offset (base_addr);
-	  boff = coff << LOG2_BITS_PER_UNIT;
-	  bit_offset += boff;
-	}
-
-      base_addr = TREE_OPERAND (base_addr, 0);
-
-      /* Avoid returning a negative bitpos as this may wreak havoc later.  */
-      if (wi::neg_p (bit_offset))
-	{
-	  offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
-	  offset_int tem = wi::bit_and_not (bit_offset, mask);
-	  /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
-	     Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
-	  bit_offset -= tem;
-	  tem >>= LOG2_BITS_PER_UNIT;
-	  if (offset)
-	    offset = size_binop (PLUS_EXPR, offset,
-				    wide_int_to_tree (sizetype, tem));
-	  else
-	    offset = wide_int_to_tree (sizetype, tem);
-	}
-
-      bitpos += bit_offset.to_shwi ();
-    }
-
-  if (bitpos % BITS_PER_UNIT)
-    return false;
-  if (bitsize % BITS_PER_UNIT)
-    return false;
-  if (reversep)
-    return false;
-
-  if (!init_symbolic_number (n, ref))
-    return false;
-  n->base_addr = base_addr;
-  n->offset = offset;
-  n->bytepos = bitpos / BITS_PER_UNIT;
-  n->alias_set = reference_alias_ptr_type (ref);
-  n->vuse = gimple_vuse (stmt);
-  return true;
-}
-
-/* Compute the symbolic number N representing the result of a bitwise OR on 2
-   symbolic number N1 and N2 whose source statements are respectively
-   SOURCE_STMT1 and SOURCE_STMT2.  */
-
-static gimple *
-perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
-			gimple *source_stmt2, struct symbolic_number *n2,
-			struct symbolic_number *n)
-{
-  int i, size;
-  uint64_t mask;
-  gimple *source_stmt;
-  struct symbolic_number *n_start;
-
-  tree rhs1 = gimple_assign_rhs1 (source_stmt1);
-  if (TREE_CODE (rhs1) == BIT_FIELD_REF
-      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
-    rhs1 = TREE_OPERAND (rhs1, 0);
-  tree rhs2 = gimple_assign_rhs1 (source_stmt2);
-  if (TREE_CODE (rhs2) == BIT_FIELD_REF
-      && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
-    rhs2 = TREE_OPERAND (rhs2, 0);
-
-  /* Sources are different, cancel bswap if they are not memory location with
-     the same base (array, structure, ...).  */
-  if (rhs1 != rhs2)
-    {
-      uint64_t inc;
-      HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
-      struct symbolic_number *toinc_n_ptr, *n_end;
-      basic_block bb1, bb2;
-
-      if (!n1->base_addr || !n2->base_addr
-	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
-	return NULL;
-
-      if (!n1->offset != !n2->offset
-	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
-	return NULL;
-
-      if (n1->bytepos < n2->bytepos)
-	{
-	  n_start = n1;
-	  start_sub = n2->bytepos - n1->bytepos;
-	}
-      else
-	{
-	  n_start = n2;
-	  start_sub = n1->bytepos - n2->bytepos;
-	}
-
-      bb1 = gimple_bb (source_stmt1);
-      bb2 = gimple_bb (source_stmt2);
-      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
-	source_stmt = source_stmt1;
-      else
-	source_stmt = source_stmt2;
-
-      /* Find the highest address at which a load is performed and
-	 compute related info.  */
-      end1 = n1->bytepos + (n1->range - 1);
-      end2 = n2->bytepos + (n2->range - 1);
-      if (end1 < end2)
-	{
-	  end = end2;
-	  end_sub = end2 - end1;
-	}
-      else
-	{
-	  end = end1;
-	  end_sub = end1 - end2;
-	}
-      n_end = (end2 > end1) ? n2 : n1;
-
-      /* Find symbolic number whose lsb is the most significant.  */
-      if (BYTES_BIG_ENDIAN)
-	toinc_n_ptr = (n_end == n1) ? n2 : n1;
-      else
-	toinc_n_ptr = (n_start == n1) ? n2 : n1;
-
-      n->range = end - n_start->bytepos + 1;
-
-      /* Check that the range of memory covered can be represented by
-	 a symbolic number.  */
-      if (n->range > 64 / BITS_PER_MARKER)
-	return NULL;
-
-      /* Reinterpret byte marks in symbolic number holding the value of
-	 bigger weight according to target endianness.  */
-      inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
-      size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
-      for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
-	{
-	  unsigned marker
-	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
-	  if (marker && marker != MARKER_BYTE_UNKNOWN)
-	    toinc_n_ptr->n += inc;
-	}
-    }
-  else
-    {
-      n->range = n1->range;
-      n_start = n1;
-      source_stmt = source_stmt1;
-    }
-
-  if (!n1->alias_set
-      || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
-    n->alias_set = n1->alias_set;
-  else
-    n->alias_set = ptr_type_node;
-  n->vuse = n_start->vuse;
-  n->base_addr = n_start->base_addr;
-  n->offset = n_start->offset;
-  n->src = n_start->src;
-  n->bytepos = n_start->bytepos;
-  n->type = n_start->type;
-  size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-
-  for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
-    {
-      uint64_t masked1, masked2;
-
-      masked1 = n1->n & mask;
-      masked2 = n2->n & mask;
-      if (masked1 && masked2 && masked1 != masked2)
-	return NULL;
-    }
-  n->n = n1->n | n2->n;
-  n->n_ops = n1->n_ops + n2->n_ops;
-
-  return source_stmt;
-}
-
-/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
-   the operation given by the rhs of STMT on the result.  If the operation
-   could successfully be executed the function returns a gimple stmt whose
-   rhs's first tree is the expression of the source operand and NULL
-   otherwise.  */
-
-static gimple *
-find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
-{
-  enum tree_code code;
-  tree rhs1, rhs2 = NULL;
-  gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
-  enum gimple_rhs_class rhs_class;
-
-  if (!limit || !is_gimple_assign (stmt))
-    return NULL;
-
-  rhs1 = gimple_assign_rhs1 (stmt);
-
-  if (find_bswap_or_nop_load (stmt, rhs1, n))
-    return stmt;
-
-  /* Handle BIT_FIELD_REF.  */
-  if (TREE_CODE (rhs1) == BIT_FIELD_REF
-      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
-    {
-      unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
-      unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
-      if (bitpos % BITS_PER_UNIT == 0
-	  && bitsize % BITS_PER_UNIT == 0
-	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
-	{
-	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
-	  if (BYTES_BIG_ENDIAN)
-	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
-
-	  /* Shift.  */
-	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
-	    return NULL;
-
-	  /* Mask.  */
-	  uint64_t mask = 0;
-	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
-	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
-	       i++, tmp <<= BITS_PER_UNIT)
-	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
-	  n->n &= mask;
-
-	  /* Convert.  */
-	  n->type = TREE_TYPE (rhs1);
-	  if (!n->base_addr)
-	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-
-	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
-	}
-
-      return NULL;
-    }
-
-  if (TREE_CODE (rhs1) != SSA_NAME)
-    return NULL;
-
-  code = gimple_assign_rhs_code (stmt);
-  rhs_class = gimple_assign_rhs_class (stmt);
-  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
-
-  if (rhs_class == GIMPLE_BINARY_RHS)
-    rhs2 = gimple_assign_rhs2 (stmt);
-
-  /* Handle unary rhs and binary rhs with integer constants as second
-     operand.  */
-
-  if (rhs_class == GIMPLE_UNARY_RHS
-      || (rhs_class == GIMPLE_BINARY_RHS
-	  && TREE_CODE (rhs2) == INTEGER_CST))
-    {
-      if (code != BIT_AND_EXPR
-	  && code != LSHIFT_EXPR
-	  && code != RSHIFT_EXPR
-	  && code != LROTATE_EXPR
-	  && code != RROTATE_EXPR
-	  && !CONVERT_EXPR_CODE_P (code))
-	return NULL;
-
-      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
-
-      /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
-	 we have to initialize the symbolic number.  */
-      if (!source_stmt1)
-	{
-	  if (gimple_assign_load_p (stmt)
-	      || !init_symbolic_number (n, rhs1))
-	    return NULL;
-	  source_stmt1 = stmt;
-	}
-
-      switch (code)
-	{
-	case BIT_AND_EXPR:
-	  {
-	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-	    uint64_t val = int_cst_value (rhs2), mask = 0;
-	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
-
-	    /* Only constants masking full bytes are allowed.  */
-	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
-	      if ((val & tmp) != 0 && (val & tmp) != tmp)
-		return NULL;
-	      else if (val & tmp)
-		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
-
-	    n->n &= mask;
-	  }
-	  break;
-	case LSHIFT_EXPR:
-	case RSHIFT_EXPR:
-	case LROTATE_EXPR:
-	case RROTATE_EXPR:
-	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
-	    return NULL;
-	  break;
-	CASE_CONVERT:
-	  {
-	    int i, type_size, old_type_size;
-	    tree type;
-
-	    type = gimple_expr_type (stmt);
-	    type_size = TYPE_PRECISION (type);
-	    if (type_size % BITS_PER_UNIT != 0)
-	      return NULL;
-	    type_size /= BITS_PER_UNIT;
-	    if (type_size > 64 / BITS_PER_MARKER)
-	      return NULL;
-
-	    /* Sign extension: result is dependent on the value.  */
-	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
-		&& HEAD_MARKER (n->n, old_type_size))
-	      for (i = 0; i < type_size - old_type_size; i++)
-		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
-			<< ((type_size - 1 - i) * BITS_PER_MARKER);
-
-	    if (type_size < 64 / BITS_PER_MARKER)
-	      {
-		/* If STMT casts to a smaller type mask out the bits not
-		   belonging to the target type.  */
-		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
-	      }
-	    n->type = type;
-	    if (!n->base_addr)
-	      n->range = type_size;
-	  }
-	  break;
-	default:
-	  return NULL;
-	};
-      return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
-    }
-
-  /* Handle binary rhs.  */
-
-  if (rhs_class == GIMPLE_BINARY_RHS)
-    {
-      struct symbolic_number n1, n2;
-      gimple *source_stmt, *source_stmt2;
-
-      if (code != BIT_IOR_EXPR)
-	return NULL;
-
-      if (TREE_CODE (rhs2) != SSA_NAME)
-	return NULL;
-
-      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
-
-      switch (code)
-	{
-	case BIT_IOR_EXPR:
-	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
-
-	  if (!source_stmt1)
-	    return NULL;
-
-	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
-
-	  if (!source_stmt2)
-	    return NULL;
-
-	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
-	    return NULL;
-
-	  if (!n1.vuse != !n2.vuse
-	      || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
-	    return NULL;
-
-	  source_stmt
-	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
-
-	  if (!source_stmt)
-	    return NULL;
-
-	  if (!verify_symbolic_number_p (n, stmt))
-	    return NULL;
-
-	  break;
-	default:
-	  return NULL;
-	}
-      return source_stmt;
-    }
-  return NULL;
-}
-
-/* Check if STMT completes a bswap implementation or a read in a given
-   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
-   accordingly.  It also sets N to represent the kind of operations
-   performed: size of the resulting expression and whether it works on
-   a memory source, and if so alias-set and vuse.  At last, the
-   function returns a stmt whose rhs's first tree is the source
-   expression.  */
-
-static gimple *
-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
-{
-  unsigned rsize;
-  uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
-   to have a full byte swap.  The number is shifted to the right
-   according to the size of the symbolic number before using it.  */
-  uint64_t cmpxchg = CMPXCHG;
-  uint64_t cmpnop = CMPNOP;
-
-  gimple *ins_stmt;
-  int limit;
-
-  /* The last parameter determines the depth search limit.  It usually
-     correlates directly to the number n of bytes to be touched.  We
-     increase that number by log2(n) + 1 here in order to also
-     cover signed -> unsigned conversions of the src operand as can be seen
-     in libgcc, and for initial shift/and operation of the src operand.  */
-  limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
-  limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
-  ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
-
-  if (!ins_stmt)
-    return NULL;
-
-  /* Find real size of result (highest non-zero byte).  */
-  if (n->base_addr)
-    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
-  else
-    rsize = n->range;
-
-  /* Zero out the bits corresponding to untouched bytes in original gimple
-     expression.  */
-  if (n->range < (int) sizeof (int64_t))
-    {
-      mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
-      cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
-      cmpnop &= mask;
-    }
-
-  /* Zero out the bits corresponding to unused bytes in the result of the
-     gimple expression.  */
-  if (rsize < n->range)
-    {
-      if (BYTES_BIG_ENDIAN)
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg &= mask;
-	  cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
-	}
-      else
-	{
-	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
-	  cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
-	  cmpnop &= mask;
-	}
-      n->range = rsize;
-    }
-
-  /* A complete byte swap should make the symbolic number to start with
-     the largest digit in the highest order byte. Unchanged symbolic
-     number indicates a read with same endianness as target architecture.  */
-  if (n->n == cmpnop)
-    *bswap = false;
-  else if (n->n == cmpxchg)
-    *bswap = true;
-  else
-    return NULL;
-
-  /* Useless bit manipulation performed by code.  */
-  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
-    return NULL;
-
-  n->range *= BITS_PER_UNIT;
-  return ins_stmt;
-}
-
-namespace {
-
-const pass_data pass_data_optimize_bswap =
-{
-  GIMPLE_PASS, /* type */
-  "bswap", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_NONE, /* tv_id */
-  PROP_ssa, /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  0, /* todo_flags_finish */
-};
-
-class pass_optimize_bswap : public gimple_opt_pass
-{
-public:
-  pass_optimize_bswap (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual bool gate (function *)
-    {
-      return flag_expensive_optimizations && optimize;
-    }
-
-  virtual unsigned int execute (function *);
-
-}; // class pass_optimize_bswap
-
-/* Perform the bswap optimization: replace the expression computed in the rhs
-   of CUR_STMT by an equivalent bswap, load or load + bswap expression.
-   Which of these alternatives replace the rhs is given by N->base_addr (non
-   null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
-   load to perform are also given in N while the builtin bswap invoke is given
-   in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
-   load statements involved to construct the rhs in CUR_STMT and N->range gives
-   the size of the rhs expression for maintaining some statistics.
-
-   Note that if the replacement involve a load, CUR_STMT is moved just after
-   SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
-   changing of basic block.  */
-
-static bool
-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
-	       tree bswap_type, tree load_type, struct symbolic_number *n,
-	       bool bswap)
-{
-  gimple_stmt_iterator gsi;
-  tree src, tmp, tgt;
-  gimple *bswap_stmt;
-
-  gsi = gsi_for_stmt (cur_stmt);
-  src = n->src;
-  tgt = gimple_assign_lhs (cur_stmt);
-
-  /* Need to load the value from memory first.  */
-  if (n->base_addr)
-    {
-      gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
-      tree addr_expr, addr_tmp, val_expr, val_tmp;
-      tree load_offset_ptr, aligned_load_type;
-      gimple *addr_stmt, *load_stmt;
-      unsigned align;
-      HOST_WIDE_INT load_offset = 0;
-      basic_block ins_bb, cur_bb;
-
-      ins_bb = gimple_bb (ins_stmt);
-      cur_bb = gimple_bb (cur_stmt);
-      if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
-	return false;
-
-      align = get_object_alignment (src);
-
-      /* Move cur_stmt just before  one of the load of the original
-	 to ensure it has the same VUSE.  See PR61517 for what could
-	 go wrong.  */
-      if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
-	reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
-      gsi_move_before (&gsi, &gsi_ins);
-      gsi = gsi_for_stmt (cur_stmt);
-
-      /* Compute address to load from and cast according to the size
-	 of the load.  */
-      addr_expr = build_fold_addr_expr (unshare_expr (src));
-      if (is_gimple_mem_ref_addr (addr_expr))
-	addr_tmp = addr_expr;
-      else
-	{
-	  addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
-					 "load_src");
-	  addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
-	  gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
-	}
-
-      /* Perform the load.  */
-      aligned_load_type = load_type;
-      if (align < TYPE_ALIGN (load_type))
-	aligned_load_type = build_aligned_type (load_type, align);
-      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
-      val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
-			      load_offset_ptr);
-
-      if (!bswap)
-	{
-	  if (n->range == 16)
-	    nop_stats.found_16bit++;
-	  else if (n->range == 32)
-	    nop_stats.found_32bit++;
-	  else
-	    {
-	      gcc_assert (n->range == 64);
-	      nop_stats.found_64bit++;
-	    }
-
-	  /* Convert the result of load if necessary.  */
-	  if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
-	    {
-	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
-					    "load_dst");
-	      load_stmt = gimple_build_assign (val_tmp, val_expr);
-	      gimple_set_vuse (load_stmt, n->vuse);
-	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
-	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
-	    }
-	  else
-	    {
-	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
-	      gimple_set_vuse (cur_stmt, n->vuse);
-	    }
-	  update_stmt (cur_stmt);
-
-	  if (dump_file)
-	    {
-	      fprintf (dump_file,
-		       "%d bit load in target endianness found at: ",
-		       (int) n->range);
-	      print_gimple_stmt (dump_file, cur_stmt, 0);
-	    }
-	  return true;
-	}
-      else
-	{
-	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
-	  load_stmt = gimple_build_assign (val_tmp, val_expr);
-	  gimple_set_vuse (load_stmt, n->vuse);
-	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
-	}
-      src = val_tmp;
-    }
-  else if (!bswap)
-    {
-      gimple *g;
-      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
-	{
-	  if (!is_gimple_val (src))
-	    return false;
-	  g = gimple_build_assign (tgt, NOP_EXPR, src);
-	}
-      else
-	g = gimple_build_assign (tgt, src);
-      if (n->range == 16)
-	nop_stats.found_16bit++;
-      else if (n->range == 32)
-	nop_stats.found_32bit++;
-      else
-	{
-	  gcc_assert (n->range == 64);
-	  nop_stats.found_64bit++;
-	}
-      if (dump_file)
-	{
-	  fprintf (dump_file,
-		   "%d bit reshuffle in target endianness found at: ",
-		   (int) n->range);
-	  print_gimple_stmt (dump_file, cur_stmt, 0);
-	}
-      gsi_replace (&gsi, g, true);
-      return true;
-    }
-  else if (TREE_CODE (src) == BIT_FIELD_REF)
-    src = TREE_OPERAND (src, 0);
-
-  if (n->range == 16)
-    bswap_stats.found_16bit++;
-  else if (n->range == 32)
-    bswap_stats.found_32bit++;
-  else
-    {
-      gcc_assert (n->range == 64);
-      bswap_stats.found_64bit++;
-    }
-
-  tmp = src;
-
-  /* Convert the src expression if necessary.  */
-  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
-    {
-      gimple *convert_stmt;
-
-      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
-      convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
-      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
-    }
-
-  /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
-     are considered as rotation of 2N bit values by N bits is generally not
-     equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
-     gives 0x03040102 while a bswap for that value is 0x04030201.  */
-  if (bswap && n->range == 16)
-    {
-      tree count = build_int_cst (NULL, BITS_PER_UNIT);
-      src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
-      bswap_stmt = gimple_build_assign (NULL, src);
-    }
-  else
-    bswap_stmt = gimple_build_call (fndecl, 1, tmp);
-
-  tmp = tgt;
-
-  /* Convert the result if necessary.  */
-  if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
-    {
-      gimple *convert_stmt;
-
-      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
-      convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
-      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
-    }
-
-  gimple_set_lhs (bswap_stmt, tmp);
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "%d bit bswap implementation found at: ",
-	       (int) n->range);
-      print_gimple_stmt (dump_file, cur_stmt, 0);
-    }
-
-  gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
-  gsi_remove (&gsi, true);
-  return true;
-}
-
-/* Find manual byte swap implementations as well as load in a given
-   endianness. Byte swaps are turned into a bswap builtin invokation
-   while endian loads are converted to bswap builtin invokation or
-   simple load according to the target endianness.  */
-
-unsigned int
-pass_optimize_bswap::execute (function *fun)
-{
-  basic_block bb;
-  bool bswap32_p, bswap64_p;
-  bool changed = false;
-  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
-
-  if (BITS_PER_UNIT != 8)
-    return 0;
-
-  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
-	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
-  bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
-	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
-		   || (bswap32_p && word_mode == SImode)));
-
-  /* Determine the argument type of the builtins.  The code later on
-     assumes that the return and argument type are the same.  */
-  if (bswap32_p)
-    {
-      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
-      bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
-    }
-
-  if (bswap64_p)
-    {
-      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
-      bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
-    }
-
-  memset (&nop_stats, 0, sizeof (nop_stats));
-  memset (&bswap_stats, 0, sizeof (bswap_stats));
-  calculate_dominance_info (CDI_DOMINATORS);
-
-  FOR_EACH_BB_FN (bb, fun)
-    {
-      gimple_stmt_iterator gsi;
-
-      /* We do a reverse scan for bswap patterns to make sure we get the
-	 widest match. As bswap pattern matching doesn't handle previously
-	 inserted smaller bswap replacements as sub-patterns, the wider
-	 variant wouldn't be detected.  */
-      for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
-        {
-	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
-	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
-	  enum tree_code code;
-	  struct symbolic_number n;
-	  bool bswap;
-
-	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
-	     might be moved to a different basic block by bswap_replace and gsi
-	     must not points to it if that's the case.  Moving the gsi_prev
-	     there make sure that gsi points to the statement previous to
-	     cur_stmt while still making sure that all statements are
-	     considered in this basic block.  */
-	  gsi_prev (&gsi);
-
-	  if (!is_gimple_assign (cur_stmt))
-	    continue;
-
-	  code = gimple_assign_rhs_code (cur_stmt);
-	  switch (code)
-	    {
-	    case LROTATE_EXPR:
-	    case RROTATE_EXPR:
-	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
-		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
-		     % BITS_PER_UNIT)
-		continue;
-	      /* Fall through.  */
-	    case BIT_IOR_EXPR:
-	      break;
-	    default:
-	      continue;
-	    }
-
-	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
-
-	  if (!ins_stmt)
-	    continue;
-
-	  switch (n.range)
-	    {
-	    case 16:
-	      /* Already in canonical form, nothing to do.  */
-	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
-		continue;
-	      load_type = bswap_type = uint16_type_node;
-	      break;
-	    case 32:
-	      load_type = uint32_type_node;
-	      if (bswap32_p)
-		{
-		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
-		  bswap_type = bswap32_type;
-		}
-	      break;
-	    case 64:
-	      load_type = uint64_type_node;
-	      if (bswap64_p)
-		{
-		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
-		  bswap_type = bswap64_type;
-		}
-	      break;
-	    default:
-	      continue;
-	    }
-
-	  if (bswap && !fndecl && n.range != 16)
-	    continue;
-
-	  if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
-			     &n, bswap))
-	    changed = true;
-	}
-    }
-
-  statistics_counter_event (fun, "16-bit nop implementations found",
-			    nop_stats.found_16bit);
-  statistics_counter_event (fun, "32-bit nop implementations found",
-			    nop_stats.found_32bit);
-  statistics_counter_event (fun, "64-bit nop implementations found",
-			    nop_stats.found_64bit);
-  statistics_counter_event (fun, "16-bit bswap implementations found",
-			    bswap_stats.found_16bit);
-  statistics_counter_event (fun, "32-bit bswap implementations found",
-			    bswap_stats.found_32bit);
-  statistics_counter_event (fun, "64-bit bswap implementations found",
-			    bswap_stats.found_64bit);
-
-  return (changed ? TODO_update_ssa : 0);
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_optimize_bswap (gcc::context *ctxt)
-{
-  return new pass_optimize_bswap (ctxt);
-}
-
 /* Return true if stmt is a type conversion operation that can be stripped
    when used in a widening multiply operation.  */
 static bool
--- gcc/gimple-ssa-store-merging.c.jj	2017-11-13 11:25:26.000000000 +0100
+++ gcc/gimple-ssa-store-merging.c	2017-11-14 17:19:27.573626793 +0100
@@ -1,5 +1,5 @@
-/* GIMPLE store merging pass.
-   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* GIMPLE store merging and byte swapping passes.
+   Copyright (C) 2009-2017 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -18,8 +18,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-/* The purpose of this pass is to combine multiple memory stores of
-   constant values, values loaded from memory or bitwise operations
+/* The purpose of the store merging pass is to combine multiple memory
+   stores of constant values, values loaded from memory or bitwise operations
    on those to consecutive memory locations into fewer wider stores.
    For example, if we have a sequence peforming four byte stores to
    consecutive memory locations:
@@ -157,6 +157,7 @@
 #include "gimplify-me.h"
 #include "rtl.h"
 #include "expr.h"	/* For get_bit_range.  */
+#include "optabs-tree.h"
 #include "selftest.h"
 
 /* The maximum size (in bits) of the stores this pass should generate.  */
@@ -169,6 +170,1079 @@
 
 namespace {
 
+struct
+{
+  /* Number of hand-written 16-bit nop / bswaps found.  */
+  int found_16bit;
+
+  /* Number of hand-written 32-bit nop / bswaps found.  */
+  int found_32bit;
+
+  /* Number of hand-written 64-bit nop / bswaps found.  */
+  int found_64bit;
+} nop_stats, bswap_stats;
+
+/* A symbolic number structure is used to detect byte permutation and selection
+   patterns of a source.  To achieve that, its field N contains an artificial
+   number consisting of BITS_PER_MARKER sized markers tracking where does each
+   byte come from in the source:
+
+   0	   - target byte has the value 0
+   FF	   - target byte has an unknown value (eg. due to sign extension)
+   1..size - marker value is the byte index in the source (0 for lsb).
+
+   To detect permutations on memory sources (arrays and structures), a symbolic
+   number is also associated:
+   - a base address BASE_ADDR and an OFFSET giving the address of the source;
+   - a range which gives the difference between the highest and lowest accessed
+     memory location to make such a symbolic number;
+   - the address SRC of the source element of lowest address as a convenience
+     to easily get BASE_ADDR + offset + lowest bytepos;
+   - number of expressions N_OPS bitwise ored together to represent
+     approximate cost of the computation.
+
+   Note 1: the range is different from size as size reflects the size of the
+   type of the current expression.  For instance, for an array char a[],
+   (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
+   (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
+   time a range of 1.
+
+   Note 2: for non-memory sources, range holds the same value as size.
+
+   Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
+
+struct symbolic_number {
+  uint64_t n;
+  tree type;
+  tree base_addr;
+  tree offset;
+  HOST_WIDE_INT bytepos;
+  tree src;
+  tree alias_set;
+  tree vuse;
+  unsigned HOST_WIDE_INT range;
+  int n_ops;
+};
+
+#define BITS_PER_MARKER 8
+#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
+#define MARKER_BYTE_UNKNOWN MARKER_MASK
+#define HEAD_MARKER(n, size) \
+  ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
+
+/* The number which the find_bswap_or_nop_1 result should match in
+   order to have a nop.  The number is masked according to the size of
+   the symbolic number before using it.  */
+#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
+  (uint64_t)0x08070605 << 32 | 0x04030201)
+
+/* The number which the find_bswap_or_nop_1 result should match in
+   order to have a byte swap.  The number is masked according to the
+   size of the symbolic number before using it.  */
+#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
+  (uint64_t)0x01020304 << 32 | 0x05060708)
+
+/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
+   number N.  Return false if the requested operation is not permitted
+   on a symbolic number.  */
+
+inline bool
+do_shift_rotate (enum tree_code code,
+		 struct symbolic_number *n,
+		 int count)
+{
+  int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+  unsigned head_marker;
+
+  if (count % BITS_PER_UNIT != 0)
+    return false;
+  count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
+
+  /* Zero out the extra bits of N in order to avoid them being shifted
+     into the significant bits.  */
+  if (size < 64 / BITS_PER_MARKER)
+    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+  switch (code)
+    {
+    case LSHIFT_EXPR:
+      n->n <<= count;
+      break;
+    case RSHIFT_EXPR:
+      head_marker = HEAD_MARKER (n->n, size);
+      n->n >>= count;
+      /* Arithmetic shift of signed type: result is dependent on the value.  */
+      if (!TYPE_UNSIGNED (n->type) && head_marker)
+	for (i = 0; i < count / BITS_PER_MARKER; i++)
+	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+		  << ((size - 1 - i) * BITS_PER_MARKER);
+      break;
+    case LROTATE_EXPR:
+      n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
+      break;
+    case RROTATE_EXPR:
+      n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
+      break;
+    default:
+      return false;
+    }
+  /* Zero unused bits for size.  */
+  if (size < 64 / BITS_PER_MARKER)
+    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+  return true;
+}
+
+/* Perform sanity checking for the symbolic number N and the gimple
+   statement STMT.  */
+
+inline bool
+verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
+{
+  tree lhs_type;
+
+  lhs_type = gimple_expr_type (stmt);
+
+  if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+    return false;
+
+  if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
+    return false;
+
+  return true;
+}
+
+/* Initialize the symbolic number N for the bswap pass from the base element
+   SRC manipulated by the bitwise OR expression.  */
+
+bool
+init_symbolic_number (struct symbolic_number *n, tree src)
+{
+  int size;
+
+  if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
+    return false;
+
+  n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+  n->src = src;
+
+  /* Set up the symbolic number N by setting each byte to a value between 1 and
+     the byte size of rhs1.  The highest order byte is set to n->size and the
+     lowest order byte to 1.  */
+  n->type = TREE_TYPE (src);
+  size = TYPE_PRECISION (n->type);
+  if (size % BITS_PER_UNIT != 0)
+    return false;
+  size /= BITS_PER_UNIT;
+  if (size > 64 / BITS_PER_MARKER)
+    return false;
+  n->range = size;
+  n->n = CMPNOP;
+  n->n_ops = 1;
+
+  if (size < 64 / BITS_PER_MARKER)
+    n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+  return true;
+}
+
+/* Check if STMT might be a byte swap or a nop from a memory source and returns
+   the answer. If so, REF is that memory source and the base of the memory area
+   accessed and the offset of the access from that base are recorded in N.  */
+
+bool
+find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
+{
+  /* Leaf node is an array or component ref. Memorize its base and
+     offset from base to compare to other such leaf node.  */
+  HOST_WIDE_INT bitsize, bitpos;
+  machine_mode mode;
+  int unsignedp, reversep, volatilep;
+  tree offset, base_addr;
+
+  /* Not prepared to handle PDP endian.  */
+  if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+    return false;
+
+  if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
+    return false;
+
+  base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+				   &unsignedp, &reversep, &volatilep);
+
+  if (TREE_CODE (base_addr) == MEM_REF)
+    {
+      offset_int bit_offset = 0;
+      tree off = TREE_OPERAND (base_addr, 1);
+
+      if (!integer_zerop (off))
+	{
+	  offset_int boff, coff = mem_ref_offset (base_addr);
+	  boff = coff << LOG2_BITS_PER_UNIT;
+	  bit_offset += boff;
+	}
+
+      base_addr = TREE_OPERAND (base_addr, 0);
+
+      /* Avoid returning a negative bitpos as this may wreak havoc later.  */
+      if (wi::neg_p (bit_offset))
+	{
+	  offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
+	  offset_int tem = wi::bit_and_not (bit_offset, mask);
+	  /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
+	     Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
+	  bit_offset -= tem;
+	  tem >>= LOG2_BITS_PER_UNIT;
+	  if (offset)
+	    offset = size_binop (PLUS_EXPR, offset,
+				    wide_int_to_tree (sizetype, tem));
+	  else
+	    offset = wide_int_to_tree (sizetype, tem);
+	}
+
+      bitpos += bit_offset.to_shwi ();
+    }
+
+  if (bitpos % BITS_PER_UNIT)
+    return false;
+  if (bitsize % BITS_PER_UNIT)
+    return false;
+  if (reversep)
+    return false;
+
+  if (!init_symbolic_number (n, ref))
+    return false;
+  n->base_addr = base_addr;
+  n->offset = offset;
+  n->bytepos = bitpos / BITS_PER_UNIT;
+  n->alias_set = reference_alias_ptr_type (ref);
+  n->vuse = gimple_vuse (stmt);
+  return true;
+}
+
+/* Compute the symbolic number N representing the result of a bitwise OR on 2
+   symbolic number N1 and N2 whose source statements are respectively
+   SOURCE_STMT1 and SOURCE_STMT2.  */
+
+gimple *
+perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
+			gimple *source_stmt2, struct symbolic_number *n2,
+			struct symbolic_number *n)
+{
+  int i, size;
+  uint64_t mask;
+  gimple *source_stmt;
+  struct symbolic_number *n_start;
+
+  tree rhs1 = gimple_assign_rhs1 (source_stmt1);
+  if (TREE_CODE (rhs1) == BIT_FIELD_REF
+      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
+    rhs1 = TREE_OPERAND (rhs1, 0);
+  tree rhs2 = gimple_assign_rhs1 (source_stmt2);
+  if (TREE_CODE (rhs2) == BIT_FIELD_REF
+      && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
+    rhs2 = TREE_OPERAND (rhs2, 0);
+
+  /* Sources are different, cancel bswap if they are not memory location with
+     the same base (array, structure, ...).  */
+  if (rhs1 != rhs2)
+    {
+      uint64_t inc;
+      HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
+      struct symbolic_number *toinc_n_ptr, *n_end;
+      basic_block bb1, bb2;
+
+      if (!n1->base_addr || !n2->base_addr
+	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
+	return NULL;
+
+      if (!n1->offset != !n2->offset
+	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
+	return NULL;
+
+      if (n1->bytepos < n2->bytepos)
+	{
+	  n_start = n1;
+	  start_sub = n2->bytepos - n1->bytepos;
+	}
+      else
+	{
+	  n_start = n2;
+	  start_sub = n1->bytepos - n2->bytepos;
+	}
+
+      bb1 = gimple_bb (source_stmt1);
+      bb2 = gimple_bb (source_stmt2);
+      if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+	source_stmt = source_stmt1;
+      else
+	source_stmt = source_stmt2;
+
+      /* Find the highest address at which a load is performed and
+	 compute related info.  */
+      end1 = n1->bytepos + (n1->range - 1);
+      end2 = n2->bytepos + (n2->range - 1);
+      if (end1 < end2)
+	{
+	  end = end2;
+	  end_sub = end2 - end1;
+	}
+      else
+	{
+	  end = end1;
+	  end_sub = end1 - end2;
+	}
+      n_end = (end2 > end1) ? n2 : n1;
+
+      /* Find symbolic number whose lsb is the most significant.  */
+      if (BYTES_BIG_ENDIAN)
+	toinc_n_ptr = (n_end == n1) ? n2 : n1;
+      else
+	toinc_n_ptr = (n_start == n1) ? n2 : n1;
+
+      n->range = end - n_start->bytepos + 1;
+
+      /* Check that the range of memory covered can be represented by
+	 a symbolic number.  */
+      if (n->range > 64 / BITS_PER_MARKER)
+	return NULL;
+
+      /* Reinterpret byte marks in symbolic number holding the value of
+	 bigger weight according to target endianness.  */
+      inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
+      size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
+      for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
+	{
+	  unsigned marker
+	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+	  if (marker && marker != MARKER_BYTE_UNKNOWN)
+	    toinc_n_ptr->n += inc;
+	}
+    }
+  else
+    {
+      n->range = n1->range;
+      n_start = n1;
+      source_stmt = source_stmt1;
+    }
+
+  if (!n1->alias_set
+      || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
+    n->alias_set = n1->alias_set;
+  else
+    n->alias_set = ptr_type_node;
+  n->vuse = n_start->vuse;
+  n->base_addr = n_start->base_addr;
+  n->offset = n_start->offset;
+  n->src = n_start->src;
+  n->bytepos = n_start->bytepos;
+  n->type = n_start->type;
+  size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+  for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
+    {
+      uint64_t masked1, masked2;
+
+      masked1 = n1->n & mask;
+      masked2 = n2->n & mask;
+      if (masked1 && masked2 && masked1 != masked2)
+	return NULL;
+    }
+  n->n = n1->n | n2->n;
+  n->n_ops = n1->n_ops + n2->n_ops;
+
+  return source_stmt;
+}
+
+/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
+   the operation given by the rhs of STMT on the result.  If the operation
+   could successfully be executed the function returns a gimple stmt whose
+   rhs's first tree is the expression of the source operand and NULL
+   otherwise.  */
+
+gimple *
+find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
+{
+  enum tree_code code;
+  tree rhs1, rhs2 = NULL;
+  gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
+  enum gimple_rhs_class rhs_class;
+
+  if (!limit || !is_gimple_assign (stmt))
+    return NULL;
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+
+  if (find_bswap_or_nop_load (stmt, rhs1, n))
+    return stmt;
+
+  /* Handle BIT_FIELD_REF.  */
+  if (TREE_CODE (rhs1) == BIT_FIELD_REF
+      && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
+    {
+      unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
+      unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
+      if (bitpos % BITS_PER_UNIT == 0
+	  && bitsize % BITS_PER_UNIT == 0
+	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
+	{
+	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
+	  if (BYTES_BIG_ENDIAN)
+	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
+
+	  /* Shift.  */
+	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
+	    return NULL;
+
+	  /* Mask.  */
+	  uint64_t mask = 0;
+	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
+	       i++, tmp <<= BITS_PER_UNIT)
+	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+	  n->n &= mask;
+
+	  /* Convert.  */
+	  n->type = TREE_TYPE (rhs1);
+	  if (!n->base_addr)
+	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
+	}
+
+      return NULL;
+    }
+
+  if (TREE_CODE (rhs1) != SSA_NAME)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  rhs_class = gimple_assign_rhs_class (stmt);
+  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+
+  if (rhs_class == GIMPLE_BINARY_RHS)
+    rhs2 = gimple_assign_rhs2 (stmt);
+
+  /* Handle unary rhs and binary rhs with integer constants as second
+     operand.  */
+
+  if (rhs_class == GIMPLE_UNARY_RHS
+      || (rhs_class == GIMPLE_BINARY_RHS
+	  && TREE_CODE (rhs2) == INTEGER_CST))
+    {
+      if (code != BIT_AND_EXPR
+	  && code != LSHIFT_EXPR
+	  && code != RSHIFT_EXPR
+	  && code != LROTATE_EXPR
+	  && code != RROTATE_EXPR
+	  && !CONVERT_EXPR_CODE_P (code))
+	return NULL;
+
+      source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
+
+      /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
+	 we have to initialize the symbolic number.  */
+      if (!source_stmt1)
+	{
+	  if (gimple_assign_load_p (stmt)
+	      || !init_symbolic_number (n, rhs1))
+	    return NULL;
+	  source_stmt1 = stmt;
+	}
+
+      switch (code)
+	{
+	case BIT_AND_EXPR:
+	  {
+	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+	    uint64_t val = int_cst_value (rhs2), mask = 0;
+	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+
+	    /* Only constants masking full bytes are allowed.  */
+	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
+	      if ((val & tmp) != 0 && (val & tmp) != tmp)
+		return NULL;
+	      else if (val & tmp)
+		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+
+	    n->n &= mask;
+	  }
+	  break;
+	case LSHIFT_EXPR:
+	case RSHIFT_EXPR:
+	case LROTATE_EXPR:
+	case RROTATE_EXPR:
+	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
+	    return NULL;
+	  break;
+	CASE_CONVERT:
+	  {
+	    int i, type_size, old_type_size;
+	    tree type;
+
+	    type = gimple_expr_type (stmt);
+	    type_size = TYPE_PRECISION (type);
+	    if (type_size % BITS_PER_UNIT != 0)
+	      return NULL;
+	    type_size /= BITS_PER_UNIT;
+	    if (type_size > 64 / BITS_PER_MARKER)
+	      return NULL;
+
+	    /* Sign extension: result is dependent on the value.  */
+	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
+		&& HEAD_MARKER (n->n, old_type_size))
+	      for (i = 0; i < type_size - old_type_size; i++)
+		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+			<< ((type_size - 1 - i) * BITS_PER_MARKER);
+
+	    if (type_size < 64 / BITS_PER_MARKER)
+	      {
+		/* If STMT casts to a smaller type mask out the bits not
+		   belonging to the target type.  */
+		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
+	      }
+	    n->type = type;
+	    if (!n->base_addr)
+	      n->range = type_size;
+	  }
+	  break;
+	default:
+	  return NULL;
+	};
+      return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
+    }
+
+  /* Handle binary rhs.  */
+
+  if (rhs_class == GIMPLE_BINARY_RHS)
+    {
+      struct symbolic_number n1, n2;
+      gimple *source_stmt, *source_stmt2;
+
+      if (code != BIT_IOR_EXPR)
+	return NULL;
+
+      if (TREE_CODE (rhs2) != SSA_NAME)
+	return NULL;
+
+      rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+
+      switch (code)
+	{
+	case BIT_IOR_EXPR:
+	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
+
+	  if (!source_stmt1)
+	    return NULL;
+
+	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
+
+	  if (!source_stmt2)
+	    return NULL;
+
+	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
+	    return NULL;
+
+	  if (!n1.vuse != !n2.vuse
+	      || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+	    return NULL;
+
+	  source_stmt
+	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+
+	  if (!source_stmt)
+	    return NULL;
+
+	  if (!verify_symbolic_number_p (n, stmt))
+	    return NULL;
+
+	  break;
+	default:
+	  return NULL;
+	}
+      return source_stmt;
+    }
+  return NULL;
+}
+
+/* Check if STMT completes a bswap implementation or a read in a given
+   endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+   accordingly.  It also sets N to represent the kind of operations
+   performed: size of the resulting expression and whether it works on
+   a memory source, and if so alias-set and vuse.  At last, the
+   function returns a stmt whose rhs's first tree is the source
+   expression.  */
+
+gimple *
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+{
+  unsigned rsize;
+  uint64_t tmpn, mask;
+/* The number which the find_bswap_or_nop_1 result should match in order
+   to have a full byte swap.  The number is shifted to the right
+   according to the size of the symbolic number before using it.  */
+  uint64_t cmpxchg = CMPXCHG;
+  uint64_t cmpnop = CMPNOP;
+
+  gimple *ins_stmt;
+  int limit;
+
+  /* The last parameter determines the depth search limit.  It usually
+     correlates directly to the number n of bytes to be touched.  We
+     increase that number by log2(n) + 1 here in order to also
+     cover signed -> unsigned conversions of the src operand as can be seen
+     in libgcc, and for initial shift/and operation of the src operand.  */
+  limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
+  limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
+  ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+
+  if (!ins_stmt)
+    return NULL;
+
+  /* Find real size of result (highest non-zero byte).  */
+  if (n->base_addr)
+    for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+  else
+    rsize = n->range;
+
+  /* Zero out the bits corresponding to untouched bytes in original gimple
+     expression.  */
+  if (n->range < (int) sizeof (int64_t))
+    {
+      mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
+      cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+      cmpnop &= mask;
+    }
+
+  /* Zero out the bits corresponding to unused bytes in the result of the
+     gimple expression.  */
+  if (rsize < n->range)
+    {
+      if (BYTES_BIG_ENDIAN)
+	{
+	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+	  cmpxchg &= mask;
+	  cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
+	}
+      else
+	{
+	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+	  cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
+	  cmpnop &= mask;
+	}
+      n->range = rsize;
+    }
+
+  /* A complete byte swap should make the symbolic number to start with
+     the largest digit in the highest order byte. Unchanged symbolic
+     number indicates a read with same endianness as target architecture.  */
+  if (n->n == cmpnop)
+    *bswap = false;
+  else if (n->n == cmpxchg)
+    *bswap = true;
+  else
+    return NULL;
+
+  /* Useless bit manipulation performed by code.  */
+  if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
+    return NULL;
+
+  n->range *= BITS_PER_UNIT;
+  return ins_stmt;
+}
+
+const pass_data pass_data_optimize_bswap =
+{
+  GIMPLE_PASS, /* type */
+  "bswap", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_ssa, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_optimize_bswap : public gimple_opt_pass
+{
+public:
+  pass_optimize_bswap (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
+    }
+
+  virtual unsigned int execute (function *);
+
+}; // class pass_optimize_bswap
+
+/* Perform the bswap optimization: replace the expression computed in the rhs
+   of CUR_STMT by an equivalent bswap, load or load + bswap expression.
+   Which of these alternatives replace the rhs is given by N->base_addr (non
+   null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
+   load to perform are also given in N while the builtin bswap invoke is given
+   in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
+   load statements involved to construct the rhs in CUR_STMT and N->range gives
+   the size of the rhs expression for maintaining some statistics.
+
+   Note that if the replacement involve a load, CUR_STMT is moved just after
+   SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
+   changing of basic block.  */
+
+bool
+bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
+	       tree bswap_type, tree load_type, struct symbolic_number *n,
+	       bool bswap)
+{
+  gimple_stmt_iterator gsi;
+  tree src, tmp, tgt;
+  gimple *bswap_stmt;
+
+  gsi = gsi_for_stmt (cur_stmt);
+  src = n->src;
+  tgt = gimple_assign_lhs (cur_stmt);
+
+  /* Need to load the value from memory first.  */
+  if (n->base_addr)
+    {
+      gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
+      tree addr_expr, addr_tmp, val_expr, val_tmp;
+      tree load_offset_ptr, aligned_load_type;
+      gimple *addr_stmt, *load_stmt;
+      unsigned align;
+      HOST_WIDE_INT load_offset = 0;
+      basic_block ins_bb, cur_bb;
+
+      ins_bb = gimple_bb (ins_stmt);
+      cur_bb = gimple_bb (cur_stmt);
+      if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+	return false;
+
+      align = get_object_alignment (src);
+
+      /* Move cur_stmt just before  one of the load of the original
+	 to ensure it has the same VUSE.  See PR61517 for what could
+	 go wrong.  */
+      if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
+	reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
+      gsi_move_before (&gsi, &gsi_ins);
+      gsi = gsi_for_stmt (cur_stmt);
+
+      /* Compute address to load from and cast according to the size
+	 of the load.  */
+      addr_expr = build_fold_addr_expr (unshare_expr (src));
+      if (is_gimple_mem_ref_addr (addr_expr))
+	addr_tmp = addr_expr;
+      else
+	{
+	  addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
+					 "load_src");
+	  addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
+	  gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+	}
+
+      /* Perform the load.  */
+      aligned_load_type = load_type;
+      if (align < TYPE_ALIGN (load_type))
+	aligned_load_type = build_aligned_type (load_type, align);
+      load_offset_ptr = build_int_cst (n->alias_set, load_offset);
+      val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
+			      load_offset_ptr);
+
+      if (!bswap)
+	{
+	  if (n->range == 16)
+	    nop_stats.found_16bit++;
+	  else if (n->range == 32)
+	    nop_stats.found_32bit++;
+	  else
+	    {
+	      gcc_assert (n->range == 64);
+	      nop_stats.found_64bit++;
+	    }
+
+	  /* Convert the result of load if necessary.  */
+	  if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+	    {
+	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
+					    "load_dst");
+	      load_stmt = gimple_build_assign (val_tmp, val_expr);
+	      gimple_set_vuse (load_stmt, n->vuse);
+	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
+	    }
+	  else
+	    {
+	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
+	      gimple_set_vuse (cur_stmt, n->vuse);
+	    }
+	  update_stmt (cur_stmt);
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file,
+		       "%d bit load in target endianness found at: ",
+		       (int) n->range);
+	      print_gimple_stmt (dump_file, cur_stmt, 0);
+	    }
+	  return true;
+	}
+      else
+	{
+	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
+	  load_stmt = gimple_build_assign (val_tmp, val_expr);
+	  gimple_set_vuse (load_stmt, n->vuse);
+	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+	}
+      src = val_tmp;
+    }
+  else if (!bswap)
+    {
+      gimple *g;
+      if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+	{
+	  if (!is_gimple_val (src))
+	    return false;
+	  g = gimple_build_assign (tgt, NOP_EXPR, src);
+	}
+      else
+	g = gimple_build_assign (tgt, src);
+      if (n->range == 16)
+	nop_stats.found_16bit++;
+      else if (n->range == 32)
+	nop_stats.found_32bit++;
+      else
+	{
+	  gcc_assert (n->range == 64);
+	  nop_stats.found_64bit++;
+	}
+      if (dump_file)
+	{
+	  fprintf (dump_file,
+		   "%d bit reshuffle in target endianness found at: ",
+		   (int) n->range);
+	  print_gimple_stmt (dump_file, cur_stmt, 0);
+	}
+      gsi_replace (&gsi, g, true);
+      return true;
+    }
+  else if (TREE_CODE (src) == BIT_FIELD_REF)
+    src = TREE_OPERAND (src, 0);
+
+  if (n->range == 16)
+    bswap_stats.found_16bit++;
+  else if (n->range == 32)
+    bswap_stats.found_32bit++;
+  else
+    {
+      gcc_assert (n->range == 64);
+      bswap_stats.found_64bit++;
+    }
+
+  tmp = src;
+
+  /* Convert the src expression if necessary.  */
+  if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+    {
+      gimple *convert_stmt;
+
+      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+      convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
+      gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+    }
+
+  /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
+     are considered as rotation of 2N bit values by N bits is generally not
+     equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
+     gives 0x03040102 while a bswap for that value is 0x04030201.  */
+  if (bswap && n->range == 16)
+    {
+      tree count = build_int_cst (NULL, BITS_PER_UNIT);
+      src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
+      bswap_stmt = gimple_build_assign (NULL, src);
+    }
+  else
+    bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+
+  tmp = tgt;
+
+  /* Convert the result if necessary.  */
+  if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
+    {
+      gimple *convert_stmt;
+
+      tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+      convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
+      gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
+    }
+
+  gimple_set_lhs (bswap_stmt, tmp);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "%d bit bswap implementation found at: ",
+	       (int) n->range);
+      print_gimple_stmt (dump_file, cur_stmt, 0);
+    }
+
+  gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+  gsi_remove (&gsi, true);
+  return true;
+}
+
+/* Find manual byte swap implementations as well as load in a given
+   endianness. Byte swaps are turned into a bswap builtin invokation
+   while endian loads are converted to bswap builtin invokation or
+   simple load according to the target endianness.  */
+
+unsigned int
+pass_optimize_bswap::execute (function *fun)
+{
+  basic_block bb;
+  bool bswap32_p, bswap64_p;
+  bool changed = false;
+  tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
+
+  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+  bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+		   || (bswap32_p && word_mode == SImode)));
+
+  /* Determine the argument type of the builtins.  The code later on
+     assumes that the return and argument type are the same.  */
+  if (bswap32_p)
+    {
+      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+      bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+    }
+
+  if (bswap64_p)
+    {
+      tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+      bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+    }
+
+  memset (&nop_stats, 0, sizeof (nop_stats));
+  memset (&bswap_stats, 0, sizeof (bswap_stats));
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+
+      /* We do a reverse scan for bswap patterns to make sure we get the
+	 widest match. As bswap pattern matching doesn't handle previously
+	 inserted smaller bswap replacements as sub-patterns, the wider
+	 variant wouldn't be detected.  */
+      for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
+        {
+	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
+	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+	  enum tree_code code;
+	  struct symbolic_number n;
+	  bool bswap;
+
+	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
+	     might be moved to a different basic block by bswap_replace and gsi
+	     must not points to it if that's the case.  Moving the gsi_prev
+	     there make sure that gsi points to the statement previous to
+	     cur_stmt while still making sure that all statements are
+	     considered in this basic block.  */
+	  gsi_prev (&gsi);
+
+	  if (!is_gimple_assign (cur_stmt))
+	    continue;
+
+	  code = gimple_assign_rhs_code (cur_stmt);
+	  switch (code)
+	    {
+	    case LROTATE_EXPR:
+	    case RROTATE_EXPR:
+	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
+		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
+		     % BITS_PER_UNIT)
+		continue;
+	      /* Fall through.  */
+	    case BIT_IOR_EXPR:
+	      break;
+	    default:
+	      continue;
+	    }
+
+	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+
+	  if (!ins_stmt)
+	    continue;
+
+	  switch (n.range)
+	    {
+	    case 16:
+	      /* Already in canonical form, nothing to do.  */
+	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
+		continue;
+	      load_type = bswap_type = uint16_type_node;
+	      break;
+	    case 32:
+	      load_type = uint32_type_node;
+	      if (bswap32_p)
+		{
+		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+		  bswap_type = bswap32_type;
+		}
+	      break;
+	    case 64:
+	      load_type = uint64_type_node;
+	      if (bswap64_p)
+		{
+		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+		  bswap_type = bswap64_type;
+		}
+	      break;
+	    default:
+	      continue;
+	    }
+
+	  if (bswap && !fndecl && n.range != 16)
+	    continue;
+
+	  if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
+			     &n, bswap))
+	    changed = true;
+	}
+    }
+
+  statistics_counter_event (fun, "16-bit nop implementations found",
+			    nop_stats.found_16bit);
+  statistics_counter_event (fun, "32-bit nop implementations found",
+			    nop_stats.found_32bit);
+  statistics_counter_event (fun, "64-bit nop implementations found",
+			    nop_stats.found_64bit);
+  statistics_counter_event (fun, "16-bit bswap implementations found",
+			    bswap_stats.found_16bit);
+  statistics_counter_event (fun, "32-bit bswap implementations found",
+			    bswap_stats.found_32bit);
+  statistics_counter_event (fun, "64-bit bswap implementations found",
+			    bswap_stats.found_64bit);
+
+  return (changed ? TODO_update_ssa : 0);
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_optimize_bswap (gcc::context *ctxt)
+{
+  return new pass_optimize_bswap (ctxt);
+}
+
+namespace {
+
 /* Struct recording one operand for the store, which is either a constant,
    then VAL represents the constant and all the other fields are zero,
    or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]