This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[SH] PR 53568 - Add support for bswap built-ins


Hello,

The attached patch improves code generated for byte swap expressions
such as ((x & 0xFF) << 8) | ((x >> 8) & 0xFF).
It seems that currently the tree optimizers only detect bswap32 and
bswap64 but not bswap16 patterns.  The patch adds detection for bswap16
patterns by playing along with the combine pass.

Tested with 
make -k -j8 check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,
-m4/-mb,-m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,
-m4a-single/-mb}"

and no new failures.
Test cases for this patch and the previous bswap32 patch will follow
shortly.

Cheers,
Oleg

ChangeLog:

	PR target/53568
	* config/sh/sh.md: Add peephole for swapbsi2. 
	(*swapbisi2_and_shl8, *swapbhisi2): New insns and splits.
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 188525)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4561,6 +4561,81 @@
   "swap.b	%1,%0"
   [(set_attr "type" "arith")])
 
+;; The *swapbisi2_and_shl8 pattern helps the combine pass simplifying
+;; partial byte swap expressions such as...
+;;   ((x & 0xFF) << 8) | ((x >> 8) & 0xFF).
+;; ...which are currently not handled by the tree optimizers.
+;; The combine pass will not initially try to combine the full expression,
+;; but only some sub-expressions.  In such a case the *swapbisi2_and_shl8
+;; pattern acts as an intermediate pattern that will eventually lead combine
+;; to the swapbsi2 pattern above.
+;; As a side effect this also improves code that does (x & 0xFF) << 8
+;; or (x << 8) & 0xFF00.
+(define_insn_and_split "*swapbisi2_and_shl8"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendqisi2 (tmp0, gen_lowpart (QImode, operands[1])));
+  emit_insn (gen_swapbsi2 (tmp1, tmp0));
+  emit_insn (gen_iorsi3 (operands[0], tmp1, operands[2]));
+  DONE;
+})
+
+;; The *swapbhisi2 pattern is, like the *swapbisi2_and_shl8 pattern, another
+;; intermediate pattern that will help the combine pass arriving at swapbsi2.
+(define_insn_and_split "*swapbhisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendhisi2 (tmp, gen_lowpart (HImode, operands[1])));
+  emit_insn (gen_swapbsi2 (operands[0], tmp));
+  DONE;
+})
+
+;; In some cases the swapbsi2 pattern might leave a sequence such as...
+;;   swap.b  r4,r4
+;;   mov     r4,r0
+;;
+;; which can be simplified to...
+;;   swap.b  r4,r0
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))])
+
 
 ;; -------------------------------------------------------------------------
 ;; Zero extension instructions

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]