[PATCH] rs6000: clz/ctz/ffs improvement (PR78683)

Segher Boessenkool segher@kernel.crashing.org
Fri Dec 9 10:37:00 GMT 2016


On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for
the ctz sequences than we do today.

CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the
same fixed value (only dependent on TARGET_* options).

I originally tried to have the generic code handle this; that would be
too much surgery for stage 3 though.

Bootstrapped and tested on powerpc64-linux {-m32,-m64}; also tested
manually with {-m32,-m64} -mcpu=power{4,7,9}.  Is this okay for trunk?


Segher


2016-12-09  Segher Boessenkool  <segher@kernel.crashing.org>

	PR target/78683
	* config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO):
	Use GET_MODE_BITSIZE.  Return 2.
	(CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE.  Return 2. Handle
	TARGET_POPCNTD the same as TARGET_CTZ.
	* config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
	(ffs<mode>2): Reimplement.

---
 gcc/config/rs6000/rs6000.h  | 11 ++++----
 gcc/config/rs6000/rs6000.md | 62 +++++++++++++++++++++++----------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5d56927..fe314bf 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2199,14 +2199,15 @@ do {									     \
 
 /* The cntlzw and cntlzd instructions return 32 and 64 for input of zero.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+  ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
 
 /* The CTZ patterns that are implemented in terms of CLZ return -1 for input of
-   zero.  The hardware instructions added in Power9 return 32 or 64.  */
+   zero.  The hardware instructions added in Power9 and the sequences using
+   popcount return 32 or 64.  */
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				\
-  ((!TARGET_CTZ)							\
-   ? ((VALUE) = -1, 1)							\
-   : ((VALUE) = ((MODE) == SImode ? 32 : 64), 1))
+  (TARGET_CTZ || TARGET_POPCNTD						\
+   ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2)				\
+   : ((VALUE) = -1, 2))
 
 /* Specify the machine mode that pointers have.
    After generation of rtl, the compiler makes no further distinction
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4726d73..777b996 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2220,17 +2220,8 @@ (define_insn "clz<mode>2"
   [(set_attr "type" "cntlz")])
 
 (define_expand "ctz<mode>2"
-  [(set (match_dup 2)
-	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
-   (set (match_dup 3)
-	(and:GPR (match_dup 1)
-		 (match_dup 2)))
-   (set (match_dup 4)
-	(clz:GPR (match_dup 3)))
-   (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
-		   (minus:GPR (match_dup 5)
-			      (match_dup 4)))
-	      (clobber (reg:GPR CA_REGNO))])]
+   [(set (match_operand:GPR 0 "gpc_reg_operand")
+	 (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
   ""
 {
   if (TARGET_CTZ)
@@ -2239,10 +2230,26 @@ (define_expand "ctz<mode>2"
       DONE;
     }
 
-  operands[2] = gen_reg_rtx (<MODE>mode);
-  operands[3] = gen_reg_rtx (<MODE>mode);
-  operands[4] = gen_reg_rtx (<MODE>mode);
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  rtx tmp2 = gen_reg_rtx (<MODE>mode);
+  rtx tmp3 = gen_reg_rtx (<MODE>mode);
+
+  if (TARGET_POPCNTD)
+    {
+      emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx));
+      emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1]));
+      emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2));
+      emit_insn (gen_popcntd<mode>2 (operands[0], tmp3));
+    }
+  else
+    {
+      emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+      emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+      emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+      emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3));
+    }
+
+  DONE;
 })
 
 (define_insn "ctz<mode>2_hw"
@@ -2253,23 +2260,18 @@ (define_insn "ctz<mode>2_hw"
   [(set_attr "type" "cntlz")])
 
 (define_expand "ffs<mode>2"
-  [(set (match_dup 2)
-	(neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
-   (set (match_dup 3)
-	(and:GPR (match_dup 1)
-		 (match_dup 2)))
-   (set (match_dup 4)
-	(clz:GPR (match_dup 3)))
-   (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
-		   (minus:GPR (match_dup 5)
-			      (match_dup 4)))
-	      (clobber (reg:GPR CA_REGNO))])]
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
   ""
 {
-  operands[2] = gen_reg_rtx (<MODE>mode);
-  operands[3] = gen_reg_rtx (<MODE>mode);
-  operands[4] = gen_reg_rtx (<MODE>mode);
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  rtx tmp1 = gen_reg_rtx (<MODE>mode);
+  rtx tmp2 = gen_reg_rtx (<MODE>mode);
+  rtx tmp3 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+  emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+  emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+  emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3));
+  DONE;
 })
 
 
-- 
1.9.3



More information about the Gcc-patches mailing list