This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Some middle-end improvements for bitfield handling


Hi!

This patch improves slightly += or -= operations on bit-fields.
On x86-64 in:

struct S { unsigned int i : 6, j : 11, k : 15; } b;
void plus2 (unsigned int x)
{
  b.j += x;
}
void plus3 (unsigned int x)
{
  b.k += x;
}

the first hunk removes an unneeded andl:
 plus2:
 	movl    b(%rip), %edx
 	movl    %edx, %eax
 	andl    $-131009, %edx
 	shrl    $6, %eax
-	andl    $2047, %eax
 	addl    %edi, %eax
 	andl    $2047, %eax
 	sall    $6, %eax
 	orl     %eax, %edx
 	movl    %edx, b(%rip)
 	ret
(the first andl will not change anything on the bits which are not masked
out by the second andl).
The second hunk is more important:
 plus3:
-	movzwl  b+2(%rip), %eax
-	movl    b(%rip), %edx
-	andl    $131071, %edx
-	shrw    %ax
-	movzwl  %ax, %eax
-	addl    %edi, %eax
-	sall    $17, %eax
-	orl     %eax, %edx
-	movl    %edx, b(%rip)
+	sall    $17, %edi
+	addl    %edi, b(%rip)
 	ret
I have failed to do this in the combiner, as it handles only 3 instructions
while this and similar cases would need 6 or 7 instructions merged together
and simplified, so I have modified the expander to special case this.
If some value is added or subtracted from the topmost bitfield, there is no
need for any masking, as the addition/subtraction of value shifted up will not
change anything on the lower bits.  This is done in expand_assignment,
since anything below already doesn't see the original to tree and thus can't
compare it with TREE_OPERAND (from, 0).

The plus2 routine might be optimized even more to something like:
 plus2:
 	movl    b(%rip), %edx
 	movl    %edx, %eax
 	andl    $-131009, %edx
- 	shrl    $6, %eax
- 	addl    %edi, %eax
- 	andl    $2047, %eax
- 	sall    $6, %eax
+	sall	$6, %edi
+	addl	$edi, %eax
+	andl	$131008, %eax
 	orl     %eax, %edx
 	movl    %edx, b(%rip)
 	ret
but it wouldn't be a win in all cases (if insv/extv instructions can be used
or if the mask shifted up is more expensive to create than the one shift
and masking).  Maybe expand_assignment could try to emit both variants
(one using store_bit_field, one without), compare their rtx_costs and
decide which one is more beneficial.

I have tried the attached testcase on x86_64 and ppc{32,64} and in all cases
the patch improved the generated code.

2004-06-29  Jakub Jelinek  <jakub@redhat.com>

	* simplify-rtx.c (simplify_binary_operation): Simplify
	((A & N) + B) & M -> (A + B) & M if M is pow2 minus 1 constant and
	N has at least all bits in M set as well.

	* expr.c (expand_assignment): Optimize += or -= on a bit field in
	most significant bits.

	* gcc.c-torture/execute/20040629-1.c: New test.

--- gcc/simplify-rtx.c.jj	2004-06-29 14:28:38.840389262 +0200
+++ gcc/simplify-rtx.c	2004-06-29 14:29:07.170336288 +0200
@@ -1894,6 +1894,52 @@ simplify_binary_operation (enum rtx_code
 	      && ! side_effects_p (op0)
 	      && GET_MODE_CLASS (mode) != MODE_CC)
 	    return const0_rtx;
+	  /* For constants M and N, if M == (1LL << cst) - 1 && (N & M) == M,
+	     ((A & N) + B) & M -> (A + B) & M
+	     Similarly if (N & M) == 0,
+	     ((A | N) + B) & M -> (A + B) & M
+	     and for - instead of + and/or ^ instead of |.  */
+	  if (GET_CODE (trueop1) == CONST_INT
+	      && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+	      && ~INTVAL (trueop1)
+	      && (INTVAL (trueop1) & (INTVAL (trueop1) + 1)) == 0
+	      && (GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS))
+	    {
+	      rtx pmop[2];
+	      int which;
+
+	      pmop[0] = XEXP (op0, 0);
+	      pmop[1] = XEXP (op0, 1);
+
+	      for (which = 0; which < 2; which++)
+		{
+		  tem = pmop[which];
+		  switch (GET_CODE (tem))
+		    {
+		    case AND:
+		      if (GET_CODE (XEXP (tem, 1)) == CONST_INT
+			  && (INTVAL (XEXP (tem, 1)) & INTVAL (trueop1))
+			     == INTVAL (trueop1))
+			pmop[which] = XEXP (tem, 0);
+		      break;
+		    case IOR:
+		    case XOR:
+		      if (GET_CODE (XEXP (tem, 1)) == CONST_INT
+			  && (INTVAL (XEXP (tem, 1)) & INTVAL (trueop1)) == 0)
+			pmop[which] = XEXP (tem, 0);
+		      break;
+		    default:
+		      break;
+		    }
+		}
+
+	      if (pmop[0] != XEXP (op0, 0) || pmop[1] != XEXP (op0, 1))
+		{
+		  tem = simplify_gen_binary (GET_CODE (op0), mode,
+					     pmop[0], pmop[1]);
+		  return simplify_gen_binary (code, mode, tem, op1);
+		}
+	    }
 	  tem = simplify_associative_operation (code, mode, op0, op1);
 	  if (tem)
 	    return tem;
--- gcc/expr.c.jj	2004-06-29 14:28:38.840389262 +0200
+++ gcc/expr.c	2004-06-29 14:47:01.995792399 +0200
@@ -3858,6 +3858,57 @@ expand_assignment (tree to, tree from, i
 	  MEM_KEEP_ALIAS_SET_P (to_rtx) = 1;
 	}
 
+      if (mode1 == VOIDmode && !want_value
+	  && bitpos + bitsize <= BITS_PER_WORD
+	  && bitsize < BITS_PER_WORD
+	  && GET_MODE_BITSIZE (GET_MODE (to_rtx)) <= BITS_PER_WORD
+	  && !TREE_SIDE_EFFECTS (to)
+	  && TREE_CODE (TREE_TYPE (from)) == INTEGER_TYPE
+	  && TREE_CODE_CLASS (TREE_CODE (from)) == '2'
+	  && operand_equal_p (to, TREE_OPERAND (from, 0), 0))
+	{
+	  rtx value;
+	  HOST_WIDE_INT count = bitpos;
+
+	  if (BYTES_BIG_ENDIAN)
+	    count = GET_MODE_BITSIZE (GET_MODE (to_rtx)) - bitpos - bitsize;
+
+	  /* Special case some bitfield op= exp.  */
+	  switch (TREE_CODE (from))
+	    {
+	    case PLUS_EXPR:
+	    case MINUS_EXPR:
+	      if (count <= 0)
+	        break;
+
+	      /* For now, just optimize the case of the topmost bitfield
+		 where we don't need to do any masking.
+		 We might win by one instruction for the other bitfields
+		 too if insv/extv instructions aren't used, so that
+		 can be added later.  */
+	      if (count + bitsize != GET_MODE_BITSIZE (GET_MODE (to_rtx)))
+		break;
+	      value = expand_expr (TREE_OPERAND (from, 1), NULL_RTX,
+				   VOIDmode, 0);
+	      value = protect_from_queue (value, 0);
+	      to_rtx = protect_from_queue (to_rtx, 1);
+	      value = expand_shift (LSHIFT_EXPR, GET_MODE (to_rtx),
+				    value, build_int_2 (count, 0),
+				    NULL_RTX, 1);
+	      result = expand_binop (GET_MODE (to_rtx),
+				     TREE_CODE (from) == PLUS_EXPR
+				     ? add_optab : sub_optab, to_rtx,
+				     value, to_rtx, 1, OPTAB_WIDEN);
+	      if (result != to_rtx)
+		emit_move_insn (to_rtx, result);
+	      free_temp_slots ();
+	      pop_temp_slots ();
+	      return NULL_RTX;
+	    default:
+	      break;
+	    }
+	}
+
       result = store_field (to_rtx, bitsize, bitpos, mode1, from,
 			    (want_value
 			     /* Spurious cast for HPUX compiler.  */
--- gcc/testsuite/gcc.c-torture/execute/20040629-1.c.jj	2004-06-29 15:02:23.111188157 +0200
+++ gcc/testsuite/gcc.c-torture/execute/20040629-1.c	2004-06-29 12:54:00.000000000 +0200
@@ -0,0 +1,131 @@
+#ifndef T
+
+extern void abort (void);
+extern void exit (int);
+
+struct S { unsigned int i : 6, j : 11, k : 15; } b;
+struct T { unsigned int i : 5, j : 1, k : 26; } c;
+struct U { unsigned int i : 16, j : 8, k : 8; } d;
+
+unsigned int ret1 (void) { return b.i; }
+unsigned int ret2 (void) { return b.j; }
+unsigned int ret3 (void) { return b.k; }
+unsigned int ret4 (void) { return c.i; }
+unsigned int ret5 (void) { return c.j; }
+unsigned int ret6 (void) { return c.k; }
+unsigned int ret7 (void) { return d.i; }
+unsigned int ret8 (void) { return d.j; }
+unsigned int ret9 (void) { return d.k; }
+
+#define T(n, pre, post, op) 					\
+void fn1_##n (unsigned int x) { pre b.i post; }			\
+void fn2_##n (unsigned int x) { pre b.j post; }			\
+void fn3_##n (unsigned int x) { pre b.k post; }			\
+void fn4_##n (unsigned int x) { pre c.i post; }			\
+void fn5_##n (unsigned int x) { pre c.j post; }			\
+void fn6_##n (unsigned int x) { pre c.k post; }			\
+void fn7_##n (unsigned int x) { pre d.i post; }			\
+void fn8_##n (unsigned int x) { pre d.j post; }			\
+void fn9_##n (unsigned int x) { pre d.k post; }
+
+#include "n.c"
+#undef T
+
+#define FAIL(n, i) abort ()
+
+int
+main (void)
+{
+#define T(n, pre, post, op)					\
+  b.i = 51;							\
+  b.j = 636;							\
+  b.k = 31278;							\
+  c.i = 21;							\
+  c.j = 1;							\
+  c.k = 33554432;						\
+  d.i = 26812;							\
+  d.j = 156;							\
+  d.k = 187;							\
+  fn1_##n (3);							\
+  if (ret1 () != (op (51, 3) & ((1 << 6) - 1)))			\
+    FAIL (n, 1);						\
+  b.i = 51;							\
+  fn2_##n (251);						\
+  if (ret2 () != (op (636, 251) & ((1 << 11) - 1)))		\
+    FAIL (n, 2);						\
+  b.j = 636;							\
+  fn3_##n (13279);						\
+  if (ret3 () != (op (31278, 13279) & ((1 << 15) - 1)))		\
+    FAIL (n, 3);						\
+  b.j = 31278;							\
+  fn4_##n (24);							\
+  if (ret4 () != (op (21, 24) & ((1 << 5) - 1)))		\
+    FAIL (n, 4);						\
+  c.i = 21;							\
+  fn5_##n (1);							\
+  if (ret5 () != (op (1, 1) & ((1 << 1) - 1)))			\
+    FAIL (n, 5);						\
+  c.j = 1;							\
+  fn6_##n (264151);						\
+  if (ret6 () != (op (33554432, 264151) & ((1 << 26) - 1)))	\
+    FAIL (n, 6);						\
+  c.k = 33554432;						\
+  fn7_##n (713);						\
+  if (ret7 () != (op (26812, 713) & ((1 << 16) - 1)))		\
+    FAIL (n, 7);						\
+  d.i = 26812;							\
+  fn8_##n (17);							\
+  if (ret8 () != (op (156, 17) & ((1 << 8) - 1)))		\
+    FAIL (n, 8);						\
+  d.j = 156;							\
+  fn9_##n (199);						\
+  if (ret9 () != (op (187, 199) & ((1 << 8) - 1)))		\
+    FAIL (n, 9);						\
+  d.k = 187;
+
+#include "n.c"
+#undef T
+  return 0;
+}
+
+#else
+
+#ifndef opadd
+#define opadd(x, y) (x + y)
+#define opsub(x, y) (x - y)
+#define opinc(x, y) (x + 1)
+#define opdec(x, y) (x - 1)
+#define opand(x, y) (x & y)
+#define opior(x, y) (x | y)
+#define opxor(x, y) (x ^ y)
+#define opdiv(x, y) (x / y)
+#define oprem(x, y) (x % y)
+#define opadd3(x, y) (x + 3)
+#define opsub7(x, y) (x - 7)
+#define opand21(x, y) (x & 21)
+#define opior19(x, y) (x | 19)
+#define opxor37(x, y) (x ^ 37)
+#define opdiv17(x, y) (x / 17)
+#define oprem19(x, y) (x % 19)
+#endif
+
+T(1, , += x, opadd)
+T(2, ++, , opinc)
+T(3, , ++, opinc)
+T(4, , -= x, opsub)
+T(5, --, , opdec)
+T(6, , --, opdec)
+T(7, , &= x, opand)
+T(8, , |= x, opior)
+T(9, , ^= x, opxor)
+T(a, , /= x, opdiv)
+T(b, , %= x, oprem)
+T(c, , += 3, opadd3)
+T(d, , -= 7, opsub7)
+T(e, , &= 21, opand21)
+T(f, , |= 19, opior19)
+T(g, , ^= 37, opxor37)
+T(h, , /= 17, opdiv17)
+T(i, , %= 19, oprem19)
+
+#endif

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]