This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR46556 (poor address generation)


OK, I've removed the pointer-arithmetic case from expand, to be handled
later by straight-line strength reduction.  Here's the patch to deal
with just the specific pattern of PR46556 (which will also eventually be
handled by strength reduction, but not as quickly).

(FYI, I've been thinking through the strength reduction pass, and my
plan is to stage in some of the easiest cases first, hopefully for 4.7,
and gradually add the more complex pieces.  Explicit multiplies in the
IL with known constants can be done pretty easily.  More complexity is
added when the multiplier is a variable, when conditional increments are
present, and when multiplies are hidden in addressing expressions.)

The present patch was bootstrapped and regression-tested on
powerpc64-linux.  OK for trunk?

Thanks,
Bill


2011-10-24  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

gcc:

	PR rtl-optimization/46556
	* expr.c (restructure_base_and_offset): New function.
	(expand_expr_real_1): Replace result of get_inner_reference
	with result of restructure_base_and_offset when applicable.
	* Makefile.in (expr.o): Update dependencies.

gcc/testsuite:

	PR rtl-optimization/46556
	* gcc.dg/tree-ssa-pr46556-1.c: New testcase.
	* gcc.dg/tree-ssa-pr46556-2.c: Likewise.
	* gcc.dg/tree-ssa-pr46556-3.c: Likewise.


Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c	(revision 0)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+}
+
+/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 2 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 128" 1 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 1 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c	(revision 0)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+  if (n > 12)
+    foo (p->a[n], p->c[n], p->b[n]);
+  else if (n > 3)
+    foo (p->b[n], p->a[n], p->c[n]);
+}
+
+/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c	(revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+struct x
+{
+  int a[16];
+  int b[16];
+  int c[16];
+};
+
+extern void foo (int, int, int);
+
+void
+f (struct x *p, unsigned int n)
+{
+  foo (p->a[n], p->c[n], p->b[n]);
+  if (n > 3)
+    {
+      foo (p->a[n], p->c[n], p->b[n]);
+      if (n > 12)
+	foo (p->b[n], p->a[n], p->c[n]);
+    }
+}
+
+/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 "expand" } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
Index: gcc/expr.c
===================================================================
--- gcc/expr.c	(revision 180378)
+++ gcc/expr.c	(working copy)
@@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ssaexpand.h"
 #include "target-globals.h"
 #include "params.h"
+#include "tree-pretty-print.h"
 
 /* Decide whether a function's arguments should be processed
    from first to last or from last to first.
@@ -7648,7 +7649,66 @@ expand_constructor (tree exp, rtx target, enum exp
   return target;
 }
 
+/* Given BASE, OFFSET, and BITPOS derived from EXPR, determine whether
+   there is a profitable opportunity to restructure address arithmetic
+   within BASE and OFFSET.  If so, produce such a restructuring and
+   return it.  */
+/* TODO: This belongs more properly in a separate pass that performs
+   general strength reduction on straight-line code.  Eventually move
+   this there.  */
 
+static tree
+restructure_base_and_offset (tree expr, tree base, tree offset,
+			     HOST_WIDE_INT bitpos)
+{
+  tree mult_op0, mult_op1, t1, t2, offset_type, mult_expr, add_expr, mem_ref;
+  double_int c, c1, c2, c3, c4;
+
+  /* Look for the following pattern:
+
+       base   = MEM_REF (T1, C1);
+       offset = MULT_EXPR (PLUS_EXPR (T2, C2), C3)
+       bitpos = C4 * BITS_PER_UNIT
+
+     If found, convert it to:
+
+       MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
+                C1 + (C2 * C3) + C4)
+   */
+  if (!base
+      || !offset
+      || TREE_CODE (base) != MEM_REF
+      || TREE_CODE (offset) != MULT_EXPR)
+    return NULL_TREE;
+
+  mult_op0 = TREE_OPERAND (offset, 0);
+  mult_op1 = TREE_OPERAND (offset, 1);
+
+  if (TREE_CODE (mult_op0) != PLUS_EXPR
+      || TREE_CODE (mult_op1) != INTEGER_CST
+      || TREE_CODE (TREE_OPERAND (mult_op0, 1)) != INTEGER_CST)
+    return NULL_TREE;
+
+  t1 = TREE_OPERAND (base, 0);
+  t2 = TREE_OPERAND (mult_op0, 0);
+  c1 = mem_ref_offset (base);
+  c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
+  c3 = tree_to_double_int (mult_op1);
+  c4 = uhwi_to_double_int (bitpos / BITS_PER_UNIT);
+  c = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
+
+  offset_type = TREE_TYPE (TREE_OPERAND (base, 1));
+
+  mult_expr = fold_build2 (MULT_EXPR, sizetype, t2, 
+			   double_int_to_tree (sizetype, c3));
+
+  add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t1), t1, mult_expr);
+
+  mem_ref = fold_build2 (MEM_REF, TREE_TYPE (expr), add_expr,
+			 double_int_to_tree (offset_type, c));
+  return mem_ref;
+}
+
 /* expand_expr: generate code for computing expression EXP.
    An rtx for the computed value is returned.  The value is never null.
    In the case of a void EXP, const0_rtx is returned.
@@ -9584,7 +9644,7 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
       {
 	enum machine_mode mode1, mode2;
 	HOST_WIDE_INT bitsize, bitpos;
-	tree offset;
+	tree offset, tem2;
 	int volatilep = 0, must_force_mem;
 	bool packedp = false;
 	tree tem = get_inner_reference (exp, &bitsize, &bitpos, &offset,
@@ -9601,6 +9661,36 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
 		&& DECL_PACKED (TREE_OPERAND (exp, 1))))
 	  packedp = true;
 
+	/* Attempt to restructure the base and offset to combine constants.
+	   Bitfield references should eventually be lowered prior to this
+	   point and are not dealt with.  Skip BLKmode aggregates as well.
+	   This generates dead code, so require -O2.  */
+	if (optimize > 1
+	    && code != BIT_FIELD_REF
+	    && (code != COMPONENT_REF 
+		|| !DECL_BIT_FIELD (TREE_OPERAND (exp, 1)))
+	    && TYPE_MODE (TREE_TYPE (exp)) != BLKmode
+	    && bitpos % BITS_PER_UNIT == 0
+	    && ((tem2 = restructure_base_and_offset (exp, tem, offset, bitpos))
+		!= NULL_TREE))
+	  {
+	    copy_ref_info (tem2, exp);
+	    tem = tem2;
+	    /* The offset and bitpos were absorbed into the new MEM_REF, so 
+	       make sure we don't add them in again.  */
+	    offset = NULL_TREE;
+	    bitpos = 0;
+
+	    if (dump_file && (dump_flags & TDF_DETAILS))
+	      {
+		fprintf (dump_file, "\nRestructured inner reference:\n");
+		fprintf (dump_file, "  Original reference: ");
+		print_generic_expr (dump_file, exp, TDF_VOPS|TDF_MEMSYMS);
+		fprintf (dump_file, "\n  Replacement: ");
+		print_generic_expr (dump_file, tem, TDF_VOPS|TDF_MEMSYMS);
+	      }
+	  }
+
 	/* If TEM's type is a union of variable size, pass TARGET to the inner
 	   computation, since it will need a temporary and TARGET is known
 	   to have to do.  This occurs in unchecked conversion in Ada.  */
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 180378)
+++ gcc/Makefile.in	(working copy)
@@ -2926,7 +2926,7 @@ expr.o : expr.c $(CONFIG_H) $(SYSTEM_H) coretypes.
    reload.h langhooks.h intl.h $(TM_P_H) $(TARGET_H) \
    tree-iterator.h gt-expr.h $(MACHMODE_H) $(TIMEVAR_H) $(TREE_FLOW_H) \
    $(TREE_PASS_H) $(DF_H) $(DIAGNOSTIC_H) vecprim.h $(SSAEXPAND_H) \
-   $(PARAMS_H) $(COMMON_TARGET_H)
+   $(PARAMS_H) $(COMMON_TARGET_H) tree-pretty-print.h
 dojump.o : dojump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
    $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(OPTABS_H) $(INSN_ATTR_H) insn-config.h \
    langhooks.h $(GGC_H) gt-dojump.h vecprim.h $(BASIC_BLOCK_H) output.h



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]