Patch to fix bit-testing performance regression

Richard Sandiford rsandifo@redhat.com
Thu Mar 18 10:36:00 GMT 2004


This patch follows up the thread that started here:

    http://gcc.gnu.org/ml/gcc/2004-03/msg00560.html

To recap, the problem is that fold rewrites (X & (1 << C)) into
(X >> C) & 1, which means that:

    void f (unsigned int x)
    {
      if (x & 8) g ();
    }

is now implemented as:

        sra     $4,$4,3
        andi    $4,$4,0x1
        bne     $4,$0,$L4

on MIPS targets.  3.3 and earlier used the expected "andi $4,$4,0x8" instead.

Roger suggested various ways in which this could be fixed:

    http://gcc.gnu.org/ml/gcc/2004-03/msg00623.html

Since no-one commented on the relative merits of each suggestion,
I decided to pick the dojump one.

Specifically, the patch adds a new function, prefer_and_bit_test,
to test which of the two forms is better for conditional branches.
In order to distinguish conditions from normal sets, it uses rtx_cost
with an outer_code of IF_THEN_ELSE.  This distinction could be used
by back ends with parity branches, for example.

do_jump then looks for conditions of the form "(X >> C) & 1" and
rewrites them as "(X & (1 << C))" if the latter is as cheap.
This certainly seems to cure the problem for MIPS and shows a
significant performance improvement in some (proprietary) benchmarks.

Note that this patch doesn't address the problem of:

    y = (x & 8) != 0;

being implemented using shifts on targets with limited shift operators.
Not working on such a target, I don't really have time to look at it
myself.  But if someone does want to look into it, I hope that the
new prefer_and_bit_test function could be generalised to help.

Bootstrapped & regression tested on mips64{,el}-linux-gnu.  OK to install?

Richard



	* Makefile.in (dojump.o): Depend on $(GGC_H) and dojump.h.
	(GTFILES): Add $(srcdir)/dojump.h.
	(gt-dojump.h): New dependency.
	* dojump.c (and_reg, and_test, shift_test): New static variables.
	(prefer_and_bit_test): New function.
	(do_jump): Use it to choose between (X & (1 << C)) and (X >> C) & 1.

Index: dojump.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/dojump.c,v
retrieving revision 1.11
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.11 dojump.c
*** dojump.c	14 Mar 2004 22:26:04 -0000	1.11
--- dojump.c	17 Mar 2004 15:09:39 -0000
*************** 02111-1307, USA.  */
*** 33,39 ****
--- 33,41 ----
  #include "expr.h"
  #include "optabs.h"
  #include "langhooks.h"
+ #include "ggc.h"
  
+ static bool prefer_and_bit_test (enum machine_mode, int);
  static void do_jump_by_parts_greater (tree, int, rtx, rtx);
  static void do_jump_by_parts_equality (tree, rtx, rtx);
  static void do_compare_and_jump	(tree, enum rtx_code, enum rtx_code, rtx,
*************** jumpif (tree exp, rtx label)
*** 101,106 ****
--- 103,147 ----
    do_jump (exp, NULL_RTX, label);
  }
  
+ /* Used internally by prefer_and_bit_test.  */
+ 
+ static GTY(()) rtx and_reg;
+ static GTY(()) rtx and_test;
+ static GTY(()) rtx shift_test;
+ 
+ /* Compare the relative costs of "(X & (1 << BITNUM))" and "(X >> BITNUM) & 1",
+    where X is an arbitrary register of mode MODE.  Return true if the former
+    is preferred.  */
+ 
+ static bool
+ prefer_and_bit_test (enum machine_mode mode, int bitnum)
+ {
+   if (and_test == 0)
+     {
+       /* Set up rtxes for the two variations.  Use NULL as a placeholder
+ 	 for the BITNUM-based constants.  */
+       and_reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+       and_test = gen_rtx_AND (mode, and_reg, NULL);
+       shift_test = gen_rtx_AND (mode, gen_rtx_ASHIFTRT (mode, and_reg, NULL),
+ 				const1_rtx);
+     }
+   else
+     {
+       /* Change the mode of the previously-created rtxes.  */
+       PUT_MODE (and_reg, mode);
+       PUT_MODE (and_test, mode);
+       PUT_MODE (shift_test, mode);
+       PUT_MODE (XEXP (shift_test, 0), mode);
+     }
+ 
+   /* Fill in the integers.  */
+   XEXP (and_test, 0) = GEN_INT ((unsigned HOST_WIDE_INT) 1 << bitnum);
+   XEXP (XEXP (shift_test, 0), 1) = GEN_INT (bitnum);
+ 
+   return (rtx_cost (and_test, IF_THEN_ELSE)
+ 	  <= rtx_cost (shift_test, IF_THEN_ELSE));
+ }
+ 
  /* Generate code to evaluate EXP and jump to IF_FALSE_LABEL if
     the result is zero, or IF_TRUE_LABEL if the result is one.
     Either of IF_FALSE_LABEL and IF_TRUE_LABEL may be zero,
*************** do_jump (tree exp, rtx if_false_label, r
*** 226,231 ****
--- 267,291 ----
            do_jump (convert (type, exp), if_false_label, if_true_label);
            break;
          }
+ 
+       /* fold_single_bit_test() converts (X & (1 << C)) into (X >> C) & 1.
+ 	 See if the former is preferred for jump tests and restore it
+ 	 if so.  */
+       if (TREE_CODE (TREE_OPERAND (exp, 0)) == RSHIFT_EXPR
+ 	  && integer_onep (TREE_OPERAND (exp, 1)))
+ 	{
+ 	  tree arg = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+ 	  tree shift = TREE_OPERAND (TREE_OPERAND (exp, 0), 1);
+ 	  tree one = TREE_OPERAND (exp, 1);
+ 	  tree argtype = TREE_TYPE (arg);
+ 	  if (TREE_CODE (shift) == INTEGER_CST
+ 	      && compare_tree_int (shift, 0) > 0
+ 	      && compare_tree_int (shift, HOST_BITS_PER_WIDE_INT) < 0
+ 	      && prefer_and_bit_test (TYPE_MODE (argtype),
+ 				      TREE_INT_CST_LOW (shift)))
+ 	    exp = build (BIT_AND_EXPR, argtype, arg,
+ 			 fold (build (LSHIFT_EXPR, argtype, one, shift)));
+ 	}
        goto normal;
  
      case TRUTH_NOT_EXPR:
*************** do_compare_and_jump (tree exp, enum rtx_
*** 999,1001 ****
--- 1059,1063 ----
                              ? expr_size (TREE_OPERAND (exp, 0)) : NULL_RTX),
                             if_false_label, if_true_label);
  }
+ 
+ #include "gt-dojump.h"
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1262
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.1262 Makefile.in
*** Makefile.in	16 Mar 2004 21:09:22 -0000	1.1262
--- Makefile.in	17 Mar 2004 15:09:40 -0000
*************** expr.o : expr.c $(CONFIG_H) $(SYSTEM_H) 
*** 1597,1603 ****
     except.h reload.h $(GGC_H) langhooks.h intl.h $(TM_P_H) real.h $(TARGET_H)
  dojump.o : dojump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
     flags.h function.h $(EXPR_H) $(OPTABS_H) $(INSN_ATTR_H) insn-config.h \
!    langhooks.h
  builtins.o : builtins.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H)\
     flags.h $(TARGET_H) function.h $(REGS_H) $(EXPR_H) $(OPTABS_H) insn-config.h \
     $(RECOG_H) output.h typeclass.h hard-reg-set.h toplev.h hard-reg-set.h \
--- 1597,1603 ----
     except.h reload.h $(GGC_H) langhooks.h intl.h $(TM_P_H) real.h $(TARGET_H)
  dojump.o : dojump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) \
     flags.h function.h $(EXPR_H) $(OPTABS_H) $(INSN_ATTR_H) insn-config.h \
!    langhooks.h $(GGC_H) gt-dojump.h
  builtins.o : builtins.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H)\
     flags.h $(TARGET_H) function.h $(REGS_H) $(EXPR_H) $(OPTABS_H) insn-config.h \
     $(RECOG_H) output.h typeclass.h hard-reg-set.h toplev.h hard-reg-set.h \
*************** GTFILES = $(srcdir)/input.h $(srcdir)/co
*** 2086,2091 ****
--- 2086,2092 ----
    $(srcdir)/c-common.h $(srcdir)/c-tree.h \
    $(srcdir)/alias.c $(srcdir)/bitmap.c $(srcdir)/cselib.c $(srcdir)/cgraph.c \
    $(srcdir)/dbxout.c $(srcdir)/dwarf2out.c $(srcdir)/dwarf2asm.c \
+   $(srcdir)/dojump.c \
    $(srcdir)/emit-rtl.c $(srcdir)/except.c $(srcdir)/explow.c $(srcdir)/expr.c \
    $(srcdir)/fold-const.c $(srcdir)/function.c \
    $(srcdir)/gcse.c $(srcdir)/integrate.c $(srcdir)/lists.c $(srcdir)/optabs.c \
*************** gt-cgraph.h gt-coverage.h gtype-desc.h g
*** 2105,2111 ****
  gt-function.h gt-integrate.h gt-stmt.h gt-tree.h gt-varasm.h \
  gt-emit-rtl.h gt-explow.h gt-stor-layout.h gt-regclass.h \
  gt-lists.h gt-alias.h gt-cselib.h gt-fold-const.h gt-gcse.h \
! gt-expr.h gt-sdbout.h gt-optabs.h gt-bitmap.h \
  gt-dwarf2out.h gt-ra-build.h gt-reg-stack.h gt-dwarf2asm.h \
  gt-dbxout.h gt-c-common.h gt-c-decl.h gt-c-parse.h \
  gt-c-pragma.h gtype-c.h gt-input.h gt-cfglayout.h \
--- 2106,2112 ----
  gt-function.h gt-integrate.h gt-stmt.h gt-tree.h gt-varasm.h \
  gt-emit-rtl.h gt-explow.h gt-stor-layout.h gt-regclass.h \
  gt-lists.h gt-alias.h gt-cselib.h gt-fold-const.h gt-gcse.h \
! gt-expr.h gt-sdbout.h gt-optabs.h gt-bitmap.h gt-dojump.h \
  gt-dwarf2out.h gt-ra-build.h gt-reg-stack.h gt-dwarf2asm.h \
  gt-dbxout.h gt-c-common.h gt-c-decl.h gt-c-parse.h \
  gt-c-pragma.h gtype-c.h gt-input.h gt-cfglayout.h \



More information about the Gcc-patches mailing list