This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] teach emit_store_flag to use clz/ctz

From: Paolo Bonzini <bonzini at gnu dot org>
To: gcc-patches at gcc dot gnu dot org
Cc: rguenther at suse dot de, amodra at gmail dot com
Date: Fri, 27 Apr 2012 11:48:49 +0200
Subject: [PATCH] teach emit_store_flag to use clz/ctz
If the value at zero is outside the range [0, GET_MODE_BITSIZE (mode)),
"A != 0" and "A == 0" can be compiled to clz/ctz followed by a subtraction
or one's complement (only for A != 0) and a shift.  This trick can be
used effectively on PPC (though there are other problems in the machine
description to fix first).

Bootstrapped/regtested x86_64-pc-linux-gnu, and tested manually on PPC
with some changes to the machine description.  Ok for mainline?

Paolo

2012-04-27  Paolo Bonzini  <bonzini@gnu.org>

	PR 53087
	* expmed.c (emit_store_flag): Generalize to support cases
	where the result is not in the sign bit.  Add a trick
	involving the value at zero of clz or ctz.

Index: expmed.c
===================================================================
--- expmed.c	(revisione 186859)
+++ expmed.c	(copia locale)
@@ -5419,6 +5419,7 @@
   enum rtx_code rcode;
   rtx subtarget;
   rtx tem, last, trueval;
+  int shift;
 
   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
 			   target_mode);
@@ -5612,10 +5613,11 @@
 			   false) <= 1 || (code != LE && code != GT))))
     return 0;
 
-  /* Try to put the result of the comparison in the sign bit.  Assume we can't
-     do the necessary operation below.  */
+  /* Assume we can't do the necessary operation below, so try to put the
+     result of the comparison in a known bit, given by SHIFT.  */
 
   tem = 0;
+  shift = GET_MODE_BITSIZE (mode) - 1;
 
   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
      the sign bit set.  */
@@ -5650,25 +5652,22 @@
 
   if (code == EQ || code == NE)
     {
-      /* For EQ or NE, one way to do the comparison is to apply an operation
-	 that converts the operand into a positive number if it is nonzero
-	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
-	 for NE we negate.  This puts the result in the sign bit.  Then we
-	 normalize with a shift, if needed.
+      /* Look for an operation that converts the operand into a positive
+	 number if it is nonzero or zero if it was originally zero.  Then,
+	 for EQ, we subtract 1 and for NE we negate.  This puts the result
+	 in the sign bit.  Then we normalize with a shift, if needed.
 
-	 Two operations that can do the above actions are ABS and FFS, so try
-	 them.  If that doesn't work, and MODE is smaller than a full word,
-	 we can use zero-extension to the wider mode (an unsigned conversion)
-	 as the operation.  */
-
-      /* Note that ABS doesn't yield a positive number for INT_MIN, but
+	 ABS can do this; it doesn't yield a positive number for INT_MIN, but
 	 that is compensated by the subsequent overflow when subtracting
-	 one / negating.  */
+	 one / negating.  If that doesn't work, and MODE is smaller than
+	 a full word, we can use zero-extension to the wider mode (an
+	 unsigned conversion) as the operation.
 
+	 FFS could also do this, but we use a more versatile trick with
+	 CLZ/CTZ below.  */
+
       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
-      else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
-	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
 	{
 	  tem = convert_modes (word_mode, mode, op0, 1);
@@ -5684,6 +5683,69 @@
 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
 	}
 
+      if (tem == 0)
+        {
+          int clz_zero, ctz_zero, if_zero;
+
+          /* For EQ or NE, clz and ctz may give a unique value for zero that is
+             not in the [0, GET_MODE_BITSIZE (mode)) range.  The result can be
+             massaged to put the result in a known bit.  */
+
+          if (!CLZ_DEFINED_VALUE_AT_ZERO (mode, clz_zero))
+            clz_zero = 0;
+          if (!CTZ_DEFINED_VALUE_AT_ZERO (mode, ctz_zero))
+            ctz_zero = 0;
+
+          /* Prefer a negative result if normalize == -1, since that does not
+             require an additional left shift.  */
+
+          if (normalizep == -1)
+            {
+              if (ctz_zero < 0 && clz_zero >= GET_MODE_BITSIZE (mode))
+                clz_zero = 0;
+              if (clz_zero < 0 && ctz_zero >= GET_MODE_BITSIZE (mode))
+                ctz_zero = 0;
+            }
+
+          if ((clz_zero < 0 || clz_zero >= GET_MODE_BITSIZE (mode))
+              && optab_handler (clz_optab, mode) != CODE_FOR_nothing)
+            {
+              tem = expand_unop (mode, clz_optab, op0, subtarget, 1);
+              if_zero = clz_zero;
+            }
+          if (tem == 0
+              && (ctz_zero < 0 || ctz_zero >= GET_MODE_BITSIZE (mode))
+              && optab_handler (ctz_optab, mode) != CODE_FOR_nothing)
+            {
+              tem = expand_unop (mode, ctz_optab, op0, subtarget, 1);
+              if_zero = ctz_zero;
+            }
+
+          if (tem != 0)
+            {
+              if (code == NE)
+                {
+                  /* Place the result in the sign bit.  For negative value at
+                     zero, compute one's complement; for positive value at
+                     zero, subtract the mode size.  */
+                  if (if_zero > 0)
+                    tem = expand_binop (mode, sub_optab, tem,
+                                        GEN_INT (GET_MODE_BITSIZE (mode)),
+                                        subtarget, 0, OPTAB_WIDEN);
+                  else
+                    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
+                }
+              else
+                {
+                  /* The result is already in some bit, but it may not be the
+                     sign bit.  */
+                  if (if_zero > 0)
+                    shift = floor_log2 (if_zero);
+                }
+              goto result_in_known_bit;
+            }
+        }
+
       /* If we couldn't do it that way, for NE we can "or" the two's complement
 	 of the value with itself.  For EQ, we take the one's complement of
 	 that "or", which is an extra insn, so we only handle EQ if branches
@@ -5706,10 +5768,17 @@
 	}
     }
 
-  if (tem && normalizep)
-    tem = expand_shift (RSHIFT_EXPR, mode, tem,
-			GET_MODE_BITSIZE (mode) - 1,
-			subtarget, normalizep == 1);
+result_in_known_bit:
+  if (tem && normalizep == -1 && shift < GET_MODE_BITSIZE (mode) - 1)
+    {
+      tem = expand_shift (LSHIFT_EXPR, mode, tem,
+                          GET_MODE_BITSIZE (mode) - shift - 1,
+                          subtarget, normalizep == 1);
+      shift = GET_MODE_BITSIZE (mode) - 1;
+    }
+  if (tem)
+    tem = expand_shift (RSHIFT_EXPR, mode, tem, shift,
+                        subtarget, normalizep == 1);
 
   if (tem)
     {
Index: config/rs6000/rs6000.md
===================================================================
--- config/rs6000/rs6000.md	(revisione 186859)
+++ config/rs6000/rs6000.md	(copia locale)
@@ -2129,7 +2129,7 @@
 (define_expand "abssi2"
   [(set (match_operand:SI 0 "gpc_reg_operand" "")
 	(abs:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
-  ""
+  "TARGET_ISEL || TARGET_POWER"
   "
 {
   if (TARGET_ISEL)
@@ -2137,11 +2137,6 @@
       emit_insn (gen_abssi2_isel (operands[0], operands[1]));
       DONE;
     }
-  else if (! TARGET_POWER)
-    {
-      emit_insn (gen_abssi2_nopower (operands[0], operands[1]));
-      DONE;
-    }
 }")
 
 (define_insn "*abssi2_power"
@@ -2188,36 +2183,12 @@
 			  (match_dup 2)))]
   "")
 
-(define_insn_and_split "abssi2_nopower"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
-        (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0")))
-   (clobber (match_scratch:SI 2 "=&r,&r"))]
-  "! TARGET_POWER && ! TARGET_ISEL"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
-   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
-   (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 2)))]
-  "")
-
 (define_insn "*nabs_power"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r"))))]
   "TARGET_POWER"
   "nabs %0,%1")
 
-(define_insn_and_split "*nabs_nopower"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
-        (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0"))))
-   (clobber (match_scratch:SI 2 "=&r,&r"))]
-  "! TARGET_POWER"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
-   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
-   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 0)))]
-  "")
-
 (define_expand "neg<mode>2"
   [(set (match_operand:SDI 0 "gpc_reg_operand" "")
 	(neg:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
@@ -7710,40 +7681,13 @@
 (define_expand "absdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "")
 	(abs:DI (match_operand:DI 1 "gpc_reg_operand" "")))]
-  "TARGET_POWERPC64"
+  "TARGET_POWERPC64 && TARGET_ISEL"
   "
 {
-  if (TARGET_ISEL)
-    emit_insn (gen_absdi2_isel (operands[0], operands[1]));
-  else
-    emit_insn (gen_absdi2_internal (operands[0], operands[1]));
+  emit_insn (gen_absdi2_isel (operands[0], operands[1]));
   DONE;
 }")
 
-(define_insn_and_split "absdi2_internal"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
-        (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0")))
-   (clobber (match_scratch:DI 2 "=&r,&r"))]
-  "TARGET_POWERPC64 && !TARGET_ISEL"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
-   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
-   (set (match_dup 0) (minus:DI (match_dup 0) (match_dup 2)))]
-  "")
-
-(define_insn_and_split "*nabsdi2"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
-        (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0"))))
-   (clobber (match_scratch:DI 2 "=&r,&r"))]
-  "TARGET_POWERPC64 && !TARGET_ISEL"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
-   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
-   (set (match_dup 0) (minus:DI (match_dup 2) (match_dup 0)))]
-  "")
-
 (define_insn "muldi3"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
         (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
@@ -13100,13 +13044,9 @@
 				    operands[2], operands[3]);
     }
 
-  /* For SNE, we would prefer that the xor/abs sequence be used for integers.
-     For SEQ, likewise, except that comparisons with zero should be done
-     with an scc insns.  However, due to the order that combine see the
-     resulting insns, we must, in fact, allow SEQ for integers.  Fail in
-     the cases we don't want to handle or are best handled by portable
-     code.  */
-  if (GET_CODE (operands[1]) == NE)
+  /* Fail in the cases we don't want to handle or are best handled by
+     portable code.  */
+  if (GET_CODE (operands[1]) == NE || GET_CODE (operands[1]) == EQ)
     FAIL;
   if ((GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE
        || GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
Follow-Ups:
- Re: [PATCH] teach emit_store_flag to use clz/ctz
  - From: Paolo Bonzini
- Re: [PATCH] teach emit_store_flag to use clz/ctz
  - From: Richard Guenther
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]