[PATCH] assorted improvements for fold_truth_andor_1

Alexandre Oliva oliva@adacore.com
Fri Sep 25 13:38:17 GMT 2020


This patch introduces various improvements to the logic that merges
field compares.

Before the patch, we could merge:

  (a.x1 EQNE b.x1)  ANDOR  (a.y1 EQNE b.y1)

into something like:

  (((type *)&a)[Na] & MASK) EQNE (((type *)&b)[Nb] & MASK)

if both of A's fields live within the same alignment boundaries, and
so do B's, at the same relative positions.  Constants may be used
instead of the object B.

The initial goal of this patch was to enable such combinations when a
field crossed alignment boundaries, e.g. for packed types.  We can't
generally access such fields with a single memory access, so when we
come across such a compare, we will attempt to combine each access
separately.

Some merging opportunities were missed because of right-shifts,
compares expressed as e.g. ((a.x1 ^ b.x1) & MASK) EQNE 0, and
narrowing conversions, especially after earlier merges.  This patch
introduces handlers for several cases involving these.

Other merging opportunities were missed because of association.  The
existing logic would only succeed in merging a pair of consecutive
compares, or e.g. B with C in (A ANDOR B) ANDOR C, not even trying
e.g. C and D in (A ANDOR (B ANDOR C)) ANDOR D.  I've generalized the
handling of the rightmost compare in the left-hand operand, going for
the leftmost compare in the right-hand operand, and then onto trying
to merge compares pairwise, one from each operand, even if they are
not consecutive, taking care to avoid merging operations with
intervening side effects, including volatile accesses.

When it is the second of a non-consecutive pair of compares that first
accesses a word, we may merge the first compare with part of the
second compare that refers to the same word, keeping the compare of
the remaining bits at the spot where the second compare used to be.

Handling compares with non-constant fields was somewhat generalized,
now handling non-adjacent fields.  When a field of one object crosses
an alignment boundary but the other doesn't, we issue the same load in
both compares; gimple optimizers will later turn it into a single
load, without our having to handle SAVE_EXPRs at this point.

The logic for issuing split loads and compares, and ordering them, is
now shared between all cases of compares with constants and with
another object.

The -Wno-error for toplev.o on rs6000 is because of toplev.c's:

  if ((flag_sanitize & SANITIZE_ADDRESS)
      && !FRAME_GROWS_DOWNWARD)

and rs6000.h's:

#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0			\
			      || (flag_sanitize & SANITIZE_ADDRESS) != 0)

The mutually exclusive conditions involving flag_sanitize are now
noticed and reported by fold-const.c's:

	      warning (0,
		       "%<and%> of mutually exclusive equal-tests"
		       " is always 0");

This patch enables over 12k compare-merging opportunities that we used
to miss in a GCC bootstrap.

Regstrapped on x86_64-linux-gnu and ppc64-linux-gnu.  Ok to install?


for  gcc/ChangeLog

	* fold-const.c (prepare_xor): New.
	(decode_field_reference): Handle xor, shift, and narrowing
	conversions.
	(all_ones_mask_p): Remove.
	(compute_split_boundary_from_align): New.
	(build_split_load, reuse_split_load): New.
	(fold_truth_andor_1): Add recursion to combine pairs of
	non-neighboring compares.  Handle xor compared with zero.
	Handle fields straddling across alignment boundaries.
	Generalize handling of non-constant rhs.
	(fold_truth_andor): Leave sub-expression handling to the
	recursion above.
	* config/rs6000/t-rs6000 (toplev.o-warn): Disable errors.

for  gcc/testsuite/ChangeLog

	* gcc.dg/field-merge-1.c: New.
	* gcc.dg/field-merge-2.c: New.
	* gcc.dg/field-merge-3.c: New.
	* gcc.dg/field-merge-4.c: New.
	* gcc.dg/field-merge-5.c: New.
---
 gcc/config/rs6000/t-rs6000           |    4 
 gcc/fold-const.c                     |  818 ++++++++++++++++++++++++++++------
 gcc/testsuite/gcc.dg/field-merge-1.c |   64 +++
 gcc/testsuite/gcc.dg/field-merge-2.c |   31 +
 gcc/testsuite/gcc.dg/field-merge-3.c |   36 +
 gcc/testsuite/gcc.dg/field-merge-4.c |   40 ++
 gcc/testsuite/gcc.dg/field-merge-5.c |   40 ++
 7 files changed, 882 insertions(+), 151 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/field-merge-1.c
 create mode 100644 gcc/testsuite/gcc.dg/field-merge-2.c
 create mode 100644 gcc/testsuite/gcc.dg/field-merge-3.c
 create mode 100644 gcc/testsuite/gcc.dg/field-merge-4.c
 create mode 100644 gcc/testsuite/gcc.dg/field-merge-5.c

diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 1ddb572..516486d 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -52,6 +52,10 @@ $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \
 		$(srcdir)/config/rs6000/rs6000-tables.opt
 
+# FRAME_GROWS_DOWNWARD tests flag_sanitize in a way that rules out a
+# test in toplev.c.
+toplev.o-warn = -Wno-error
+
 # The rs6000 backend doesn't cause warnings in these files.
 insn-conditions.o-warn =
 
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0cc80ad..bfbf88c 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -4603,6 +4603,41 @@ optimize_bit_field_compare (location_t loc, enum tree_code code,
   return lhs;
 }
 

+/* If *R_ARG is a constant zero, and L_ARG is a possibly masked
+   BIT_XOR_EXPR, return 1 and set *r_arg to l_arg.
+   Otherwise, return 0.
+
+   The returned value should be passed to decode_field_reference for it
+   to handle l_arg, and then doubled for r_arg.  */
+static int
+prepare_xor (tree l_arg, tree *r_arg)
+{
+  int ret = 0;
+
+  if (!integer_zerop (*r_arg))
+    return ret;
+
+  tree exp = l_arg;
+  STRIP_NOPS (exp);
+
+  if (TREE_CODE (exp) == BIT_AND_EXPR)
+    {
+      tree and_mask = TREE_OPERAND (exp, 1);
+      exp = TREE_OPERAND (exp, 0);
+      STRIP_NOPS (exp); STRIP_NOPS (and_mask);
+      if (TREE_CODE (and_mask) != INTEGER_CST)
+	return ret;
+    }
+
+  if (TREE_CODE (exp) == BIT_XOR_EXPR)
+    {
+      *r_arg = l_arg;
+      return 1;
+    }
+
+  return ret;
+}
+
 /* Subroutine for fold_truth_andor_1: decode a field reference.
 
    If EXP is a comparison reference, we return the innermost reference.
@@ -4625,6 +4660,10 @@ optimize_bit_field_compare (location_t loc, enum tree_code code,
 
    *PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any.
 
+   XOR_WHICH is 1 or 2 if EXP was found to be a (possibly masked)
+   BIT_XOR_EXPR compared with zero.  We're to take the first or second
+   operand thereof if so.  It should be zero otherwise.
+
    Return 0 if this is not a component reference or is one that we can't
    do anything with.  */
 
@@ -4632,7 +4671,7 @@ static tree
 decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			HOST_WIDE_INT *pbitpos, machine_mode *pmode,
 			int *punsignedp, int *preversep, int *pvolatilep,
-			tree *pmask, tree *pand_mask)
+			tree *pmask, tree *pand_mask, int xor_which)
 {
   tree exp = *exp_;
   tree outer_type = 0;
@@ -4640,6 +4679,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   tree mask, inner, offset;
   tree unsigned_type;
   unsigned int precision;
+  HOST_WIDE_INT shiftrt = 0;
 
   /* All the optimizations using this function assume integer fields.
      There are problems with FP fields since the type_for_size call
@@ -4664,13 +4704,55 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 	return NULL_TREE;
     }
 
+  if (xor_which)
+    {
+      gcc_checking_assert (TREE_CODE (exp) == BIT_XOR_EXPR);
+      exp = TREE_OPERAND (exp, xor_which - 1);
+      STRIP_NOPS (exp);
+    }
+
+  if (CONVERT_EXPR_P (exp)
+      || TREE_CODE (exp) == NON_LVALUE_EXPR)
+    {
+      if (!outer_type)
+	outer_type = TREE_TYPE (exp);
+      exp = TREE_OPERAND (exp, 0);
+      STRIP_NOPS (exp);
+    }
+
+  if (TREE_CODE (exp) == RSHIFT_EXPR
+      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST
+      && tree_fits_shwi_p (TREE_OPERAND (exp, 1)))
+    {
+      tree shift = TREE_OPERAND (exp, 1);
+      STRIP_NOPS (shift);
+      shiftrt = tree_to_shwi (shift);
+      if (shiftrt > 0)
+	{
+	  exp = TREE_OPERAND (exp, 0);
+	  STRIP_NOPS (exp);
+	}
+      else
+	shiftrt = 0;
+    }
+
+  if (CONVERT_EXPR_P (exp)
+      || TREE_CODE (exp) == NON_LVALUE_EXPR)
+    {
+      if (!outer_type)
+	outer_type = TREE_TYPE (exp);
+      exp = TREE_OPERAND (exp, 0);
+      STRIP_NOPS (exp);
+    }
+
   poly_int64 poly_bitsize, poly_bitpos;
   inner = get_inner_reference (exp, &poly_bitsize, &poly_bitpos, &offset,
 			       pmode, punsignedp, preversep, pvolatilep);
+
   if ((inner == exp && and_mask == 0)
       || !poly_bitsize.is_constant (pbitsize)
       || !poly_bitpos.is_constant (pbitpos)
-      || *pbitsize < 0
+      || *pbitsize <= shiftrt
       || offset != 0
       || TREE_CODE (inner) == PLACEHOLDER_EXPR
       /* Reject out-of-bound accesses (PR79731).  */
@@ -4679,6 +4761,21 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
 			       *pbitpos + *pbitsize) < 0))
     return NULL_TREE;
 
+  if (shiftrt)
+    {
+      if (!*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+	*pbitpos += shiftrt;
+      *pbitsize -= shiftrt;
+    }
+
+  if (outer_type && *pbitsize > TYPE_PRECISION (outer_type))
+    {
+      HOST_WIDE_INT excess = *pbitsize - TYPE_PRECISION (outer_type);
+      if (*preversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+	*pbitpos += excess;
+      *pbitsize -= excess;
+    }
+
   unsigned_type = lang_hooks.types.type_for_size (*pbitsize, 1);
   if (unsigned_type == NULL_TREE)
     return NULL_TREE;
@@ -4709,27 +4806,6 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize,
   return inner;
 }
 
-/* Return nonzero if MASK represents a mask of SIZE ones in the low-order
-   bit positions and MASK is SIGNED.  */
-
-static bool
-all_ones_mask_p (const_tree mask, unsigned int size)
-{
-  tree type = TREE_TYPE (mask);
-  unsigned int precision = TYPE_PRECISION (type);
-
-  /* If this function returns true when the type of the mask is
-     UNSIGNED, then there will be errors.  In particular see
-     gcc.c-torture/execute/990326-1.c.  There does not appear to be
-     any documentation paper trail as to why this is so.  But the pre
-     wide-int worked with that restriction and it has been preserved
-     here.  */
-  if (size > precision || TYPE_SIGN (type) == UNSIGNED)
-    return false;
-
-  return wi::mask (size, false, precision) == wi::to_wide (mask);
-}
-
 /* Subroutine for fold: determine if VAL is the INTEGER_CONST that
    represents the sign bit of EXP's type.  If EXP represents a sign
    or zero extension, also test VAL against the unextended type.
@@ -6120,6 +6196,139 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
   return NULL_TREE;
 }
 
+/* Return the one bitpos within bit extents L or R that is at an
+   ALIGN-bit alignment boundary, or -1 if there is more than one such
+   boundary, if there isn't any, or if there is any such boundary
+   between the extents.  L and R are given by bitpos and bitsize.  If
+   it doesn't return -1, there are two consecutive ALIGN-bit words
+   that contain both extents, and at least one of the extents
+   straddles across the returned alignment boundary.  */
+static inline HOST_WIDE_INT
+compute_split_boundary_from_align (HOST_WIDE_INT align,
+				   HOST_WIDE_INT l_bitpos,
+				   HOST_WIDE_INT l_bitsize,
+				   HOST_WIDE_INT r_bitpos,
+				   HOST_WIDE_INT r_bitsize)
+{
+  HOST_WIDE_INT amask = ~(align - 1);
+
+  HOST_WIDE_INT first_bit = MIN (l_bitpos, r_bitpos);
+  HOST_WIDE_INT end_bit = MAX (l_bitpos + l_bitsize, r_bitpos + r_bitsize);
+
+  HOST_WIDE_INT boundary = (end_bit - 1) & amask;
+
+  /* Make sure we're crossing no more than one alignment boundary.
+
+     ??? We don't have logic to recombine loads of two adjacent
+     fields that each crosses a different alignment boundary, so
+     as to load the middle word only once, if other words can't be
+     otherwise recombined.  */
+  if (boundary - first_bit > align)
+    return -1;
+
+  HOST_WIDE_INT l_start_word = l_bitpos & amask;
+  HOST_WIDE_INT l_end_word = (l_bitpos + l_bitsize - 1) & amask;
+
+  HOST_WIDE_INT r_start_word = r_bitpos & amask;
+  HOST_WIDE_INT r_end_word = (r_bitpos + r_bitsize - 1) & amask;
+
+  /* If neither field straddles across an alignment boundary, it's no
+     use to even try to merge them.  */
+  if (l_start_word == l_end_word && r_start_word == r_end_word)
+    return -1;
+
+  return boundary;
+}
+
+/* Initialize ln_arg[0] and ln_arg[1] to a pair of newly-created (at
+   LOC) loads from INNER (from ORIG_INNER), of modes MODE and MODE2,
+   respectively, starting at BIT_POS, using reversed endianness if
+   REVERSEP.  Also initialize BITPOS (the starting position of each
+   part into INNER), BITSIZ (the bit count starting at BITPOS),
+   TOSHIFT[1] (the amount by which the part and its mask are to be
+   shifted right to bring its least-significant bit to bit zero) and
+   SHIFTED (the amount by which the part, by separate loading, has
+   already been shifted right, but that the mask needs shifting to
+   match).  */
+static inline void
+build_split_load (tree /* out */ ln_arg[2],
+		  HOST_WIDE_INT /* out */ bitpos[2],
+		  HOST_WIDE_INT /* out */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  location_t loc, tree inner, tree orig_inner,
+		  scalar_int_mode mode, scalar_int_mode mode2,
+		  HOST_WIDE_INT bit_pos, bool reversep)
+{
+  bitsiz[0] = GET_MODE_BITSIZE (mode);
+  bitsiz[1] = GET_MODE_BITSIZE (mode2);
+
+  for (int i = 0; i < 2; i++)
+    {
+      tree type = lang_hooks.types.type_for_size (bitsiz[i], 1);
+      bitpos[i] = bit_pos;
+      ln_arg[i] = make_bit_field_ref (loc, inner, orig_inner,
+				      type, bitsiz[i],
+				      bit_pos, 1, reversep);
+      bit_pos += bitsiz[i];
+    }
+
+  toshift[1] = toshift[0];
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      shifted[0] = bitsiz[1];
+      shifted[1] = 0;
+      toshift[0] = 0;
+    }
+  else
+    {
+      shifted[1] = bitsiz[0];
+      shifted[0] = 0;
+      toshift[1] = 0;
+    }
+}
+
+/* Make arrangements to split at bit BOUNDARY a single loaded word
+   (with REVERSEP bit order) LN_ARG[0], to be shifted right by
+   TOSHIFT[0] to bring the field of interest to the least-significant
+   bit.  The expectation is that the same loaded word will be
+   propagated from part 0 to part 1, with just different shifting and
+   masking to extract both parts.  MASK is not expected to do more
+   than masking out the bits that belong to the other part.  See
+   build_split_load for more information on the other fields.  */
+static inline void
+reuse_split_load (tree /* in[0] out[1] */ ln_arg[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitpos[2],
+		  HOST_WIDE_INT /* in[0] out[1] */ bitsiz[2],
+		  HOST_WIDE_INT /* in[0] out[0..1] */ toshift[2],
+		  HOST_WIDE_INT /* out */ shifted[2],
+		  tree /* out */ mask[2],
+		  HOST_WIDE_INT boundary, bool reversep)
+{
+  ln_arg[1] = ln_arg[0];
+  bitpos[1] = bitpos[0];
+  bitsiz[1] = bitsiz[0];
+  shifted[1] = shifted[0] = 0;
+
+  tree basemask = build_int_cst_type (TREE_TYPE (ln_arg[0]), -1);
+
+  if (reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
+    {
+      toshift[1] = toshift[0];
+      toshift[0] = bitpos[0] + bitsiz[0] - boundary;
+      mask[0] = const_binop (LSHIFT_EXPR, basemask,
+			     bitsize_int (toshift[0]));
+      mask[1] = const_binop (BIT_XOR_EXPR, basemask, mask[0]);
+    }
+  else
+    {
+      toshift[1] = boundary - bitpos[1];
+      mask[1] = const_binop (LSHIFT_EXPR, basemask,
+			     bitsize_int (toshift[1]));
+      mask[0] = const_binop (BIT_XOR_EXPR, basemask, mask[1]);
+    }
+}
+
 /* Find ways of folding logical expressions of LHS and RHS:
    Try to merge two comparisons to the same innermost item.
    Look for range tests like "ch >= '0' && ch <= '9'".
@@ -6142,11 +6351,19 @@ merge_truthop_with_opposite_arm (location_t loc, tree op, tree cmpop,
    TRUTH_TYPE is the type of the logical operand and LHS and RHS are its
    two operands.
 
+   SEPARATEP should be NULL if LHS and RHS are adjacent in
+   CODE-chained compares, and a non-NULL pointer to NULL_TREE
+   otherwise.  If the "words" accessed by RHS are already accessed by
+   LHS, this won't matter, but if RHS accesses "words" that LHS
+   doesn't, then *SEPARATEP will be set to the compares that should
+   take RHS's place.  By "words" we mean contiguous bits that do not
+   cross a an TYPE_ALIGN boundary of the accessed object's type.
+
    We return the simplified tree or 0 if no optimization is possible.  */
 
 static tree
 fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
-		    tree lhs, tree rhs)
+		    tree lhs, tree rhs, tree *separatep)
 {
   /* If this is the "or" of two comparisons, we can do something if
      the comparisons are NE_EXPR.  If this is the "and", we can do something
@@ -6157,6 +6374,7 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
      convert EQ_EXPR to NE_EXPR so we need not reject the "wrong"
      comparison for one-bit fields.  */
 
+  enum tree_code orig_code = code;
   enum tree_code wanted_code;
   enum tree_code lcode, rcode;
   tree ll_arg, lr_arg, rl_arg, rr_arg;
@@ -6168,13 +6386,16 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   int ll_unsignedp, lr_unsignedp, rl_unsignedp, rr_unsignedp;
   int ll_reversep, lr_reversep, rl_reversep, rr_reversep;
   machine_mode ll_mode, lr_mode, rl_mode, rr_mode;
-  scalar_int_mode lnmode, rnmode;
+  scalar_int_mode lnmode, lnmode2, rnmode;
   tree ll_mask, lr_mask, rl_mask, rr_mask;
   tree ll_and_mask, lr_and_mask, rl_and_mask, rr_and_mask;
   tree l_const, r_const;
   tree lntype, rntype, result;
   HOST_WIDE_INT first_bit, end_bit;
   int volatilep;
+  bool l_split_load;
+
+  gcc_checking_assert (!separatep || !*separatep);
 
   /* Start by getting the comparison codes.  Fail if anything is volatile.
      If one operand is a BIT_AND_EXPR with the constant one, treat it as if
@@ -6202,7 +6423,116 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   if (TREE_CODE_CLASS (lcode) != tcc_comparison
       || TREE_CODE_CLASS (rcode) != tcc_comparison)
-    return 0;
+    {
+      tree separate = NULL;
+
+      /* Check for the possibility of merging component references.
+	 If any of our operands is another similar operation, recurse
+	 to try to merge individual operands, but avoiding double
+	 recursion: recurse to each leaf of LHS, and from there to
+	 each leaf of RHS, but don't bother recursing into LHS if RHS
+	 is neither a comparison nor a compound expr, nor into RHS if
+	 the LHS leaf isn't a comparison.  In case of no successful
+	 merging, recursion depth is limited to the sum of the depths
+	 of LHS and RHS, and the non-recursing code below will run no
+	 more times than the product of the leaf counts of LHS and
+	 RHS.  If there is a successful merge, we (recursively)
+	 further attempt to fold the result, so recursion depth and
+	 merge attempts are harder to compute.  */
+      if (TREE_CODE (lhs) == code && TREE_TYPE (lhs) == truth_type
+	  && (TREE_CODE_CLASS (rcode) == tcc_comparison
+	      || (TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)))
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 1), rhs,
+					    separatep
+					    ? separatep
+					    : NULL)) != 0)
+	    {
+	      /* We have combined the latter part of LHS with RHS.  If
+		 they were separate, the recursion already placed any
+		 remains of RHS in *SEPARATEP, otherwise they are all
+		 in RESULT, so we just have to prepend to result the
+		 former part of LHS.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					TREE_OPERAND (lhs, 0), result);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    TREE_OPERAND (lhs, 0), rhs,
+					    separatep
+					    ? separatep
+					    : &separate)) != 0)
+	    {
+	      /* We have combined the former part of LHS with RHS.  If
+		 they were separate, the recursive call will have
+		 placed remnants of RHS in *SEPARATEP.  If they
+		 wereń't, they will be in SEPARATE instead.  Append
+		 the latter part of LHS to the result, and then any
+		 remnants of RHS that we haven't passed on to the
+		 caller.  */
+	      result = fold_build2_loc (loc, code, truth_type,
+					result, TREE_OPERAND (lhs, 1));
+	      if (separate)
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+      else if (TREE_CODE_CLASS (lcode) == tcc_comparison
+	       && TREE_CODE (rhs) == code && TREE_TYPE (rhs) == truth_type)
+	{
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 0),
+					    separatep
+					    ? &separate
+					    : NULL)) != 0)
+	    {
+	      /* We have combined LHS with the former part of RHS.  If
+		 they were separate, have any remnants of RHS placed
+		 in separate, so that we can combine them with the
+		 latter part of RHS, and then send them back for the
+		 caller to handle.  If they were adjacent, we can just
+		 append the latter part of RHS to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 1);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    separate, TREE_OPERAND (rhs, 1));
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	  if ((result = fold_truth_andor_1 (loc, code, truth_type,
+					    lhs, TREE_OPERAND (rhs, 1),
+					    &separate)) != 0)
+	    {
+	      /* We have combined LHS with the latter part of RHS.
+		 They're definitely not adjacent, so we get the
+		 remains of RHS in SEPARATE, and then prepend the
+		 former part of RHS to it.  If LHS and RHS were
+		 already separate to begin with, we leave the remnants
+		 of RHS for the caller to deal with, otherwise we
+		 append them to the RESULT.  */
+	      if (!separate)
+		separate = TREE_OPERAND (rhs, 0);
+	      else
+		separate = fold_build2_loc (loc, code, truth_type,
+					    TREE_OPERAND (rhs, 0), separate);
+	      if (separatep)
+		*separatep = separate;
+	      else
+		result = fold_build2_loc (loc, code, truth_type,
+					  result, separate);
+	      return result;
+	    }
+	}
+
+      return 0;
+    }
 
   ll_arg = TREE_OPERAND (lhs, 0);
   lr_arg = TREE_OPERAND (lhs, 1);
@@ -6278,22 +6608,24 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 
   ll_reversep = lr_reversep = rl_reversep = rr_reversep = 0;
   volatilep = 0;
+  int l_xor = prepare_xor (ll_arg, &lr_arg);
   ll_inner = decode_field_reference (loc, &ll_arg,
 				     &ll_bitsize, &ll_bitpos, &ll_mode,
 				     &ll_unsignedp, &ll_reversep, &volatilep,
-				     &ll_mask, &ll_and_mask);
+				     &ll_mask, &ll_and_mask, l_xor);
   lr_inner = decode_field_reference (loc, &lr_arg,
 				     &lr_bitsize, &lr_bitpos, &lr_mode,
 				     &lr_unsignedp, &lr_reversep, &volatilep,
-				     &lr_mask, &lr_and_mask);
+				     &lr_mask, &lr_and_mask, 2 * l_xor);
+  int r_xor = prepare_xor (rl_arg, &rr_arg);
   rl_inner = decode_field_reference (loc, &rl_arg,
 				     &rl_bitsize, &rl_bitpos, &rl_mode,
 				     &rl_unsignedp, &rl_reversep, &volatilep,
-				     &rl_mask, &rl_and_mask);
+				     &rl_mask, &rl_and_mask, r_xor);
   rr_inner = decode_field_reference (loc, &rr_arg,
 				     &rr_bitsize, &rr_bitpos, &rr_mode,
 				     &rr_unsignedp, &rr_reversep, &volatilep,
-				     &rr_mask, &rr_and_mask);
+				     &rr_mask, &rr_and_mask, 2 * r_xor);
 
   /* It must be true that the inner operation on the lhs of each
      comparison must be the same if we are to be able to do anything.
@@ -6349,6 +6681,72 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	return 0;
     }
 
+  /* This will be bumped to 2 if any of the field pairs crosses an
+     alignment boundary, so the merged compare has to be done in two
+     parts.  */
+  int parts = 1;
+  /* Set to true if the second combined compare should come first,
+     e.g., because the second original compare accesses a word that
+     the first one doesn't, and the combined compares access those in
+     cmp[0].  */
+  bool first1 = false;
+  /* Set to true if the first original compare is not the one being
+     split.  */
+  bool maybe_separate = false;
+
+  /* The following 2-dimensional arrays use the first index to
+     identify left(0)- vs right(1)-hand compare operands, and the
+     second one to identify merged compare parts.  */
+  /* The memory loads or constants to be compared.  */
+  tree ld_arg[2][2];
+  /* The first bit of the corresponding inner object that the
+     corresponding LD_ARG covers.  */
+  HOST_WIDE_INT bitpos[2][2];
+  /* The bit count starting at BITPOS that the corresponding LD_ARG
+     covers.  */
+  HOST_WIDE_INT bitsiz[2][2];
+  /* The number of bits by which LD_ARG has already been shifted
+     right, WRT mask.  */
+  HOST_WIDE_INT shifted[2][2];
+  /* The number of bits by which both LD_ARG and MASK need shifting to
+     bring its least-significant bit to bit zero.  */
+  HOST_WIDE_INT toshift[2][2];
+  /* An additional mask to be applied to LD_ARG, to remove any bits
+     that may have been loaded for use in another compare, but that
+     don't belong in the corresponding compare.  */
+  tree xmask[2][2] = {};
+
+  /* The combined compare or compares.  */
+  tree cmp[2];
+
+  /* Consider we're comparing two non-contiguous fields of packed
+     structs, both aligned at 32-bit boundaries:
+
+     ll_arg: an 8-bit field at offset 0
+     lr_arg: a 16-bit field at offset 2
+
+     rl_arg: an 8-bit field at offset 1
+     rr_arg: a 16-bit field at offset 3
+
+     We'll have r_split_load, because rr_arg straddles across an
+     alignment boundary.
+
+     We'll want to have:
+
+     bitpos  = { {  0,  0 }, {  0, 32 } }
+     bitsiz  = { { 32, 32 }, { 32,  8 } }
+
+     And, for little-endian:
+
+     shifted = { {  0,  0 }, {  0, 32 } }
+     toshift = { {  0, 24 }, {  0,  0 } }
+
+     Or, for big-endian:
+
+     shifted = { {  0,  0 }, {  8,  0 } }
+     toshift = { {  8,  0 }, {  0,  0 } }
+  */
+
   /* See if we can find a mode that contains both fields being compared on
      the left.  If we can't, fail.  Otherwise, update all constants and masks
      to be relative to a field of that size.  */
@@ -6357,11 +6755,41 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
   if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 		      TYPE_ALIGN (TREE_TYPE (ll_inner)), BITS_PER_WORD,
 		      volatilep, &lnmode))
-    return 0;
+    {
+      /* Consider the possibility of recombining loads if any of the
+	 fields straddles across an alignment boundary, so that either
+	 part can be loaded along with the other field.  */
+      HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (ll_inner));
+      HOST_WIDE_INT boundary = compute_split_boundary_from_align
+	(align, ll_bitpos, ll_bitsize, rl_bitpos, rl_bitsize);
+
+      if (boundary < 0
+	  || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode)
+	  || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+			     align, BITS_PER_WORD, volatilep, &lnmode2))
+	return 0;
+
+      l_split_load = true;
+      parts = 2;
+      if (ll_bitpos >= boundary)
+	maybe_separate = first1 = true;
+      else if (ll_bitpos + ll_bitsize <= boundary)
+	maybe_separate = true;
+    }
+  else
+    l_split_load = false;
 
   lnbitsize = GET_MODE_BITSIZE (lnmode);
   lnbitpos = first_bit & ~ (lnbitsize - 1);
+  if (l_split_load)
+    lnbitsize += GET_MODE_BITSIZE (lnmode2);
   lntype = lang_hooks.types.type_for_size (lnbitsize, 1);
+  if (!lntype)
+    {
+      gcc_checking_assert (l_split_load);
+      lntype = build_nonstandard_integer_type (lnbitsize, 1);
+    }
   xll_bitpos = ll_bitpos - lnbitpos, xrl_bitpos = rl_bitpos - lnbitpos;
 
   if (ll_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6414,19 +6842,58 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 	  || ll_reversep != lr_reversep
 	  /* Make sure the two fields on the right
 	     correspond to the left without being swapped.  */
-	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos)
+	  || ll_bitpos - rl_bitpos != lr_bitpos - rr_bitpos
+	  || lnbitpos < 0)
 	return 0;
 
+      bool r_split_load;
+      scalar_int_mode rnmode2;
+
       first_bit = MIN (lr_bitpos, rr_bitpos);
       end_bit = MAX (lr_bitpos + lr_bitsize, rr_bitpos + rr_bitsize);
       if (!get_best_mode (end_bit - first_bit, first_bit, 0, 0,
 			  TYPE_ALIGN (TREE_TYPE (lr_inner)), BITS_PER_WORD,
 			  volatilep, &rnmode))
-	return 0;
+	{
+	  /* Consider the possibility of recombining loads if any of the
+	     fields straddles across an alignment boundary, so that either
+	     part can be loaded along with the other field.  */
+	  HOST_WIDE_INT align = TYPE_ALIGN (TREE_TYPE (lr_inner));
+	  HOST_WIDE_INT boundary = compute_split_boundary_from_align
+	    (align, lr_bitpos, lr_bitsize, rr_bitpos, rr_bitsize);
+
+	  if (boundary < 0
+	      /* If we're to split both, make sure the split point is
+		 the same.  */
+	      || (l_split_load
+		  && (boundary - lr_bitpos
+		      != (lnbitpos + GET_MODE_BITSIZE (lnmode)) - ll_bitpos))
+	      || !get_best_mode (boundary - first_bit, first_bit, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode)
+	      || !get_best_mode (end_bit - boundary, boundary, 0, 0,
+				 align, BITS_PER_WORD, volatilep, &rnmode2))
+	    return 0;
+
+	  r_split_load = true;
+	  parts = 2;
+	  if (lr_bitpos >= boundary)
+	    maybe_separate = first1 = true;
+	  else if (lr_bitpos + lr_bitsize <= boundary)
+	    maybe_separate = true;
+	}
+      else
+	r_split_load = false;
 
       rnbitsize = GET_MODE_BITSIZE (rnmode);
       rnbitpos = first_bit & ~ (rnbitsize - 1);
+      if (r_split_load)
+	rnbitsize += GET_MODE_BITSIZE (rnmode2);
       rntype = lang_hooks.types.type_for_size (rnbitsize, 1);
+      if (!rntype)
+	{
+	  gcc_checking_assert (r_split_load);
+	  rntype = build_nonstandard_integer_type (rnbitsize, 1);
+	}
       xlr_bitpos = lr_bitpos - rnbitpos, xrr_bitpos = rr_bitpos - rnbitpos;
 
       if (lr_reversep ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
@@ -6442,133 +6909,190 @@ fold_truth_andor_1 (location_t loc, enum tree_code code, tree truth_type,
 							    rntype, rr_mask),
 			     size_int (xrr_bitpos));
 
-      /* Make a mask that corresponds to both fields being compared.
-	 Do this for both items being compared.  If the operands are the
-	 same size and the bits being compared are in the same position
-	 then we can do this by masking both and comparing the masked
-	 results.  */
-      ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
       lr_mask = const_binop (BIT_IOR_EXPR, lr_mask, rr_mask);
-      if (lnbitsize == rnbitsize
-	  && xll_bitpos == xlr_bitpos
-	  && lnbitpos >= 0
-	  && rnbitpos >= 0)
-	{
-	  lhs = make_bit_field_ref (loc, ll_inner, ll_arg,
-				    lntype, lnbitsize, lnbitpos,
-				    ll_unsignedp || rl_unsignedp, ll_reversep);
-	  if (! all_ones_mask_p (ll_mask, lnbitsize))
-	    lhs = build2 (BIT_AND_EXPR, lntype, lhs, ll_mask);
-
-	  rhs = make_bit_field_ref (loc, lr_inner, lr_arg,
-				    rntype, rnbitsize, rnbitpos,
-				    lr_unsignedp || rr_unsignedp, lr_reversep);
-	  if (! all_ones_mask_p (lr_mask, rnbitsize))
-	    rhs = build2 (BIT_AND_EXPR, rntype, rhs, lr_mask);
-
-	  return build2_loc (loc, wanted_code, truth_type, lhs, rhs);
-	}
-
-      /* There is still another way we can do something:  If both pairs of
-	 fields being compared are adjacent, we may be able to make a wider
-	 field containing them both.
-
-	 Note that we still must mask the lhs/rhs expressions.  Furthermore,
-	 the mask must be shifted to account for the shift done by
-	 make_bit_field_ref.  */
-      if (((ll_bitsize + ll_bitpos == rl_bitpos
-	    && lr_bitsize + lr_bitpos == rr_bitpos)
-	   || (ll_bitpos == rl_bitpos + rl_bitsize
-	       && lr_bitpos == rr_bitpos + rr_bitsize))
-	  && ll_bitpos >= 0
-	  && rl_bitpos >= 0
-	  && lr_bitpos >= 0
-	  && rr_bitpos >= 0)
-	{
-	  tree type;
-
-	  lhs = make_bit_field_ref (loc, ll_inner, ll_arg, lntype,
-				    ll_bitsize + rl_bitsize,
-				    MIN (ll_bitpos, rl_bitpos),
-				    ll_unsignedp, ll_reversep);
-	  rhs = make_bit_field_ref (loc, lr_inner, lr_arg, rntype,
-				    lr_bitsize + rr_bitsize,
-				    MIN (lr_bitpos, rr_bitpos),
-				    lr_unsignedp, lr_reversep);
-
-	  ll_mask = const_binop (RSHIFT_EXPR, ll_mask,
-				 size_int (MIN (xll_bitpos, xrl_bitpos)));
-	  lr_mask = const_binop (RSHIFT_EXPR, lr_mask,
-				 size_int (MIN (xlr_bitpos, xrr_bitpos)));
-
-	  /* Convert to the smaller type before masking out unwanted bits.  */
-	  type = lntype;
-	  if (lntype != rntype)
+
+      toshift[1][0] = MIN (xlr_bitpos, xrr_bitpos);
+      shifted[1][0] = 0;
+
+      if (!r_split_load)
+	{
+	  bitpos[1][0] = rnbitpos;
+	  bitsiz[1][0] = rnbitsize;
+	  ld_arg[1][0] = make_bit_field_ref (loc, lr_inner, lr_arg,
+					     rntype, rnbitsize, rnbitpos,
+					     lr_unsignedp || rr_unsignedp,
+					     lr_reversep);
+	}
+
+      if (parts > 1)
+	{
+	  if (r_split_load)
+	    build_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], loc, lr_inner, lr_arg,
+			      rnmode, rnmode2, rnbitpos, lr_reversep);
+	  else
+	    reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			      shifted[1], xmask[1],
+			      lnbitpos + GET_MODE_BITSIZE (lnmode)
+			      - ll_bitpos + lr_bitpos, lr_reversep);
+	}
+    }
+  else
+    {
+      /* Handle the case of comparisons with constants.  If there is
+	 something in common between the masks, those bits of the
+	 constants must be the same.  If not, the condition is always
+	 false.  Test for this to avoid generating incorrect code
+	 below.  */
+      result = const_binop (BIT_AND_EXPR, ll_mask, rl_mask);
+      if (! integer_zerop (result)
+	  && simple_cst_equal (const_binop (BIT_AND_EXPR,
+					    result, l_const),
+			       const_binop (BIT_AND_EXPR,
+					    result, r_const)) != 1)
+	{
+	  if (wanted_code == NE_EXPR)
 	    {
-	      if (lnbitsize > rnbitsize)
-		{
-		  lhs = fold_convert_loc (loc, rntype, lhs);
-		  ll_mask = fold_convert_loc (loc, rntype, ll_mask);
-		  type = rntype;
-		}
-	      else if (lnbitsize < rnbitsize)
-		{
-		  rhs = fold_convert_loc (loc, lntype, rhs);
-		  lr_mask = fold_convert_loc (loc, lntype, lr_mask);
-		  type = lntype;
-		}
+	      warning (0,
+		       "%<or%> of unmatched not-equal tests"
+		       " is always 1");
+	      return constant_boolean_node (true, truth_type);
 	    }
+	  else
+	    {
+	      warning (0,
+		       "%<and%> of mutually exclusive equal-tests"
+		       " is always 0");
+	      return constant_boolean_node (false, truth_type);
+	    }
+	}
 
-	  if (! all_ones_mask_p (ll_mask, ll_bitsize + rl_bitsize))
-	    lhs = build2 (BIT_AND_EXPR, type, lhs, ll_mask);
+      if (lnbitpos < 0)
+	return 0;
 
-	  if (! all_ones_mask_p (lr_mask, lr_bitsize + rr_bitsize))
-	    rhs = build2 (BIT_AND_EXPR, type, rhs, lr_mask);
+      /* The constants are combined so as to line up with the loaded
+	 field, so use the same parameters.  */
+      ld_arg[1][0] = const_binop (BIT_IOR_EXPR, l_const, r_const);
+      toshift[1][0] = MIN (xll_bitpos, xrl_bitpos);
+      shifted[1][0] = 0;
+      bitpos[1][0] = lnbitpos;
+      bitsiz[1][0] = lnbitsize;
 
-	  return build2_loc (loc, wanted_code, truth_type, lhs, rhs);
-	}
+      if (parts > 1)
+	reuse_split_load (ld_arg[1], bitpos[1], bitsiz[1], toshift[1],
+			  shifted[1], xmask[1],
+			  lnbitpos + GET_MODE_BITSIZE (lnmode),
+			  lr_reversep);
 
-      return 0;
+      lr_mask = build_int_cst_type (TREE_TYPE (ld_arg[1][0]), -1);
+
+      /* If the compiler thinks this is used uninitialized below, it's
+	 because it can't realize that parts can only be 2 when
+	 comparing wiht constants if l_split_load is also true.  This
+	 just silences the warning.  */
+      rnbitpos = 0;
     }
 
-  /* Handle the case of comparisons with constants.  If there is something in
-     common between the masks, those bits of the constants must be the same.
-     If not, the condition is always false.  Test for this to avoid generating
-     incorrect code below.  */
-  result = const_binop (BIT_AND_EXPR, ll_mask, rl_mask);
-  if (! integer_zerop (result)
-      && simple_cst_equal (const_binop (BIT_AND_EXPR, result, l_const),
-			   const_binop (BIT_AND_EXPR, result, r_const)) != 1)
+  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
+  toshift[0][0] = MIN (xll_bitpos, xrl_bitpos);
+  shifted[0][0] = 0;
+
+  if (!l_split_load)
     {
-      if (wanted_code == NE_EXPR)
+      bitpos[0][0] = lnbitpos;
+      bitsiz[0][0] = lnbitsize;
+      ld_arg[0][0] = make_bit_field_ref (loc, ll_inner, ll_arg,
+					 lntype, lnbitsize, lnbitpos,
+					 ll_unsignedp || rl_unsignedp,
+					 ll_reversep);
+    }
+
+  if (parts > 1)
+    {
+      if (l_split_load)
+	build_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			  shifted[0], loc, ll_inner, ll_arg,
+			  lnmode, lnmode2, lnbitpos, ll_reversep);
+      else
+	reuse_split_load (ld_arg[0], bitpos[0], bitsiz[0], toshift[0],
+			  shifted[0], xmask[0],
+			  rnbitpos + GET_MODE_BITSIZE (rnmode)
+			  - lr_bitpos + ll_bitpos, ll_reversep);
+    }
+
+  for (int i = 0; i < parts; i++)
+    {
+      tree op[2] = { ld_arg[0][i], ld_arg[1][i] };
+      tree mask[2] = { ll_mask, lr_mask };
+
+      for (int j = 0; j < 2; j++)
 	{
-	  warning (0, "%<or%> of unmatched not-equal tests is always 1");
-	  return constant_boolean_node (true, truth_type);
+	  /* Mask out the bits belonging to the other part.  */
+	  if (xmask[j][i])
+	    mask[j] = const_binop (BIT_AND_EXPR, mask[j], xmask[j][i]);
+
+	  if (shifted[j][i])
+	    {
+	      tree shiftsz = bitsize_int (shifted[j][i]);
+	      mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
+	    }
+	  mask[j] = fold_convert_loc (loc, TREE_TYPE (op[j]), mask[j]);
 	}
-      else
+
+      HOST_WIDE_INT shift = (toshift[0][i] - toshift[1][i]);
+
+      if (shift)
 	{
-	  warning (0, "%<and%> of mutually exclusive equal-tests is always 0");
-	  return constant_boolean_node (false, truth_type);
+	  int j;
+	  if (shift > 0)
+	    j = 0;
+	  else
+	    {
+	      j = 1;
+	      shift = -shift;
+	    }
+
+	  tree shiftsz = bitsize_int (shift);
+	  op[j] = fold_build2_loc (loc, RSHIFT_EXPR, TREE_TYPE (op[j]),
+				   op[j], shiftsz);
+	  mask[j] = const_binop (RSHIFT_EXPR, mask[j], shiftsz);
 	}
-    }
 
-  if (lnbitpos < 0)
-    return 0;
+      /* Convert to the smaller type before masking out unwanted
+	 bits.  */
+      tree type = TREE_TYPE (op[0]);
+      if (type != TREE_TYPE (op[1]))
+	{
+	  int j = (TYPE_PRECISION (type)
+		   < TYPE_PRECISION (TREE_TYPE (op[1])));
+	  if (!j)
+	    type = TREE_TYPE (op[1]);
+	  op[j] = fold_convert_loc (loc, type, op[j]);
+	  mask[j] = fold_convert_loc (loc, type, mask[j]);
+	}
 
-  /* Construct the expression we will return.  First get the component
-     reference we will make.  Unless the mask is all ones the width of
-     that field, perform the mask operation.  Then compare with the
-     merged constant.  */
-  result = make_bit_field_ref (loc, ll_inner, ll_arg,
-			       lntype, lnbitsize, lnbitpos,
-			       ll_unsignedp || rl_unsignedp, ll_reversep);
+      for (int j = 0; j < 2; j++)
+	if (! integer_all_onesp (mask[j]))
+	  op[j] = build2_loc (loc, BIT_AND_EXPR, type,
+			      op[j], mask[j]);
 
-  ll_mask = const_binop (BIT_IOR_EXPR, ll_mask, rl_mask);
-  if (! all_ones_mask_p (ll_mask, lnbitsize))
-    result = build2_loc (loc, BIT_AND_EXPR, lntype, result, ll_mask);
+      cmp[i] = build2_loc (loc, wanted_code, truth_type, op[0], op[1]);
+    }
+
+  if (first1)
+    std::swap (cmp[0], cmp[1]);
+
+  if (parts == 1)
+    result = cmp[0];
+  else if (!separatep || !maybe_separate)
+    result = build2_loc (loc, orig_code, truth_type, cmp[0], cmp[1]);
+  else
+    {
+      result = cmp[0];
+      *separatep = cmp[1];
+    }
 
-  return build2_loc (loc, wanted_code, truth_type, result,
-		     const_binop (BIT_IOR_EXPR, l_const, r_const));
+  return result;
 }
 

 /* T is an integer expression that is being multiplied, divided, or taken a
@@ -9166,15 +9690,7 @@ fold_truth_andor (location_t loc, enum tree_code code, tree type,
 	return fold_build2_loc (loc, code, type, arg0, tem);
     }
 
-  /* Check for the possibility of merging component references.  If our
-     lhs is another similar operation, try to merge its rhs with our
-     rhs.  Then try to merge our lhs and rhs.  */
-  if (TREE_CODE (arg0) == code
-      && (tem = fold_truth_andor_1 (loc, code, type,
-				    TREE_OPERAND (arg0, 1), arg1)) != 0)
-    return fold_build2_loc (loc, code, type, TREE_OPERAND (arg0, 0), tem);
-
-  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1)) != 0)
+  if ((tem = fold_truth_andor_1 (loc, code, type, arg0, arg1, NULL)) != 0)
     return tem;
 
   bool logical_op_non_short_circuit = LOGICAL_OP_NON_SHORT_CIRCUIT;
diff --git a/gcc/testsuite/gcc.dg/field-merge-1.c b/gcc/testsuite/gcc.dg/field-merge-1.c
new file mode 100644
index 00000000..1818e104
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-1.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O -save-temps -fdump-tree-optimized" } */
+
+/* Check that field loads compared with constants are merged, even if
+   tested out of order, and when fields straddle across alignment
+   boundaries.  */
+
+struct TL {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("little-endian")));
+
+struct TB {
+  unsigned char p;
+  unsigned int a;
+  unsigned char q;
+  unsigned int b;
+  unsigned char r;
+  unsigned int c;
+  unsigned char s;
+} __attribute__ ((packed, aligned (4), scalar_storage_order ("big-endian")));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct TL vL = { vc, vi, vc, vi, vc, vi, vc };
+struct TB vB = { vc, vi, vc, vi, vc, vi, vc };
+
+void f (void) {
+  /* Which words of    | vL | vB | */
+  /* are accessed by   |0123|0123| */
+  if (0 /* the tests?  |    |    | */
+      || vL.p != vc /* |*   |    | */
+      || vB.p != vc /* |    |*   | */
+      || vL.s != vc /* |   *|    | */
+      || vB.q != vc /* |    | *  | */
+      || vL.a != vi /* |^*  |    | */
+      || vB.b != vi /* |    | ^* | */
+      || vL.c != vi /* |  *^|    | */
+      || vB.c != vi /* |    |  ^*| */
+      || vL.b != vi /* | ^^ |    | */
+      || vL.q != vc /* | ^  |    | */
+      || vB.a != vi /* |    |^^  | */
+      || vB.r != vc /* |    |  ^ | */
+      || vB.s != vc /* |    |   ^| */
+      || vL.r != vc /* |  ^ |    | */
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 8 "optimized" } } */
+/* { dg-final { scan-assembler-not "cmpb" { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpl" 8 { target { i*86-*-* || x86_64-*-* } } } } */
+/* { dg-final { scan-assembler-times "cmpw" 8 { target { powerpc*-*-* || rs6000-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/field-merge-2.c b/gcc/testsuite/gcc.dg/field-merge-2.c
new file mode 100644
index 00000000..80c573b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct TL {
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+struct TB {
+  unsigned char p;
+  unsigned short a;
+  unsigned short b;
+} __attribute__ ((packed, aligned (8)));
+
+#define vc 0xaa
+
+struct TL vL = { vc, vc };
+struct TB vB = { vc, vc, vc };
+
+void f (void) {
+  if (0
+      || vL.b != vB.b
+      || vL.a != vB.a
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/field-merge-3.c b/gcc/testsuite/gcc.dg/field-merge-3.c
new file mode 100644
index 00000000..8fdbb9a3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-3.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+const int BIG_ENDIAN_P = (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__);
+
+struct T1 {
+  unsigned char p[2];
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__aligned__(8)));
+
+struct T2 {
+  unsigned short p;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__aligned__(8)));
+
+#define vc 0xaa
+#define vi 0x12345678
+
+struct T1 v1 = { { vc + !BIG_ENDIAN_P, vc + BIG_ENDIAN_P }, vc, vi };
+struct T2 v2 = { (vc << 8) | (vc - 1), vc, vi };
+
+void f (void) {
+  if (0
+      || v1.p[!BIG_ENDIAN_P] != v2.p >> 8
+      || v1.a != v2.a
+      || (v1.z ^ v2.z) & 0xff00ff00 != 0
+      || (v1.z ^ v2.z) & 0x00ff00ff != 0)
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/field-merge-4.c b/gcc/testsuite/gcc.dg/field-merge-4.c
new file mode 100644
index 00000000..c629069
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-4.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+  unsigned int zn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+struct T2 {
+  unsigned int zn;
+  unsigned char rn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(4)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { -1, vc, 1, vs, vi };
+struct T2 v2 = { -1, 0, vc, 1, vs, vi };
+
+void f (void) {
+  if (0
+      || v1.p != v2.p
+      || v1.a != v2.a
+      || v1.z != v2.z
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/field-merge-5.c b/gcc/testsuite/gcc.dg/field-merge-5.c
new file mode 100644
index 00000000..1580b14bc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/field-merge-5.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O" } */
+
+struct T1 {
+  unsigned int zn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(8)));
+
+struct T2 {
+  unsigned int zn;
+  unsigned char rn;
+  unsigned char p;
+  unsigned char qn;
+  unsigned short a;
+  unsigned int z;
+} __attribute__((__packed__, __aligned__(8)));
+
+#define vc 0xaa
+#define vs 0xccdd
+#define vi 0x12345678
+
+struct T1 v1 = { -1, vc, 1, vs, vi };
+struct T2 v2 = { -1, 0, vc, 1, vs, vi };
+
+void f (void) {
+  if (0
+      || v1.p != v2.p
+      || v1.a != v2.a
+      || v1.z != v2.z
+      )
+    __builtin_abort ();
+}
+
+int main () {
+  f ();
+  return 0;
+}


-- 
Alexandre Oliva, happy hacker
https://FSFLA.org/blogs/lxo/
Free Software Activist
GNU Toolchain Engineer


More information about the Gcc-patches mailing list