[PATCH] match.pd: undistribute (a << s) & C, when C = (M << s) and exact_log2(M - 1)

Philipp Tomsich philipp.tomsich@vrull.eu
Wed Nov 11 10:17:32 GMT 2020


From: Philipp Tomsich <prt@gnu.org>

The function
    long f(long a)
    {
    	return(a & 0xFFFFFFFFull) << 3;
    }
is folded into
    _1 = a_2(D) << 3;
    _3 = _1 & 34359738360;
wheras the construction
    return (a & 0xFFFFFFFFull) * 8;
results in
    _1 = a_2(D) & 4294967295;
    _3 = _1 * 8;

This leads to suboptimal code-generation for RISC-V (march=rv64g), as
the shifted constant needs to be expanded into 3 RTX and 2 RTX (one
each for the LSHIFT_EXPR and the BIT_AND_EXPR) which will overwhelm
the combine pass (a sequence of 5 RTX are not considered):
	li	a5,1		# tmp78,	# 23	[c=4 l=4]  *movdi_64bit/1
	slli	a5,a5,35	#, tmp79, tmp78	# 24	[c=4 l=4]  ashldi3
	addi	a5,a5,-8	#, tmp77, tmp79	# 9	[c=4 l=4]  adddi3/1
	slli	a0,a0,3		#, tmp76, tmp80	# 6	[c=4 l=4]  ashldi3
	and	a0,a0,a5	# tmp77,, tmp76	# 15	[c=4 l=4]  anddi3/0
	ret			# 28	[c=0 l=4]  simple_return
instead of:
	slli	a0,a0,32	#, tmp76, tmp79	# 26	[c=4 l=4]  ashldi3
	srli	a0,a0,29	#,, tmp76	# 27	[c=4 l=4]  lshrdi3
	ret			    		# 24	[c=0 l=4]  simple_return

We address this by adding a simplification for
   (a << s) & M, where ((M >> s) << s) == M
to
   (a & M_unshifted) << s, where M_unshifted := (M >> s)
which undistributes the LSHIFT.

Signed-off-by: Philipp Tomsich <prt@gnu.org>
---
 gcc/match.pd                            | 11 +++++++++--
 gcc/testsuite/gcc.target/riscv/zextws.c | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zextws.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 349eab6..6bb9535 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3079,6 +3079,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	     }
 	 }
      }
+    (if (GIMPLE && (((mask >> shiftc) << shiftc) == mask)
+	        && (exact_log2((mask >> shiftc) + 1) >= 0)
+	        && (shift == LSHIFT_EXPR))
+	 (with
+	  { tree newmaskt = build_int_cst_type(TREE_TYPE (@2), mask >> shiftc); }
+	  (shift (convert (bit_and:shift_type (convert @0) { newmaskt; })) @1))
      /* ((X << 16) & 0xff00) is (X, 0).  */
      (if ((mask & zerobits) == mask)
       { build_int_cst (type, 0); }
@@ -3100,7 +3106,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	   (if (!tree_int_cst_equal (newmaskt, @2))
 	    (if (shift_type != TREE_TYPE (@3))
 	     (bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt; })
-	     (bit_and @4 { newmaskt; })))))))))))))
+	     (bit_and @4 { newmaskt; }))))))))))))))
 
 /* Fold (X {&,^,|} C2) << C1 into (X << C1) {&,^,|} (C2 << C1)
    (X {&,^,|} C2) >> C1 into (X >> C1) & (C2 >> C1).  */
@@ -3108,7 +3114,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (for bit_op (bit_and bit_xor bit_ior)
   (simplify
    (shift (convert?:s (bit_op:s @0 INTEGER_CST@2)) INTEGER_CST@1)
-   (if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
+   (if (tree_nop_conversion_p (type, TREE_TYPE (@0))
+        && !wi::exact_log2(wi::to_wide(@2) + 1))
     (with { tree mask = int_const_binop (shift, fold_convert (type, @2), @1); }
      (bit_op (shift (convert @0) @1) { mask; }))))))
 
diff --git a/gcc/testsuite/gcc.target/riscv/zextws.c b/gcc/testsuite/gcc.target/riscv/zextws.c
new file mode 100644
index 0000000..8ac93f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zextws.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64g -mabi=lp64 -O2" } */
+
+/* Test for
+     (a << s) & M', where ((M >> s) << s) == M
+   being undistributed into
+     (a & M_unshifted) << s, where M_unshifted := (M >> s)
+   to produce the sequence (or similar)
+     slli	a0,a0,32
+     srli	a0,a0,29
+*/
+long
+zextws_mask (long i)
+{
+  return (i & 0xffffffffULL) << 3;
+}
+/* { dg-final { scan-assembler "slli" } } */
+/* { dg-final { scan-assembler "srli" } } */
-- 
1.8.3.1



More information about the Gcc-patches mailing list