This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RX] Fix PR 83831 -- Unused bclr, bnot, bset insns


Hi,

The attached patch fixes the deficits mentioned in PR 83831 which is
about unused bclr, bnot and bset instructions.

For some simple cases, the combine pass can successfully fuse a load-
modify-store insn sequence into an insn that operates on a memory
directly.  However, in some cases where it thinks it's too complex, it
will not try to combine the insns.

What I'm doing here is similar to what I've been doing on SH in the
split1 pass after combine -- manually walking the insns up/down
(limited to the BB) to find the def/use and fuse 3 insns into 1, if
it's possible to do so.

For that I've copy-pasted some of the RTL utility functions from SH.  I
will propose folding and moving those into rtl.h / rtlanal.c during
next stage 1.

With that patch, I get a code size decrease of about 1 KByte on a
larger application.

The attached patch is the version for GCC 8 (trunk).  I've posted
versions for GCC 6 and GCC 7 in the PR.  All 3 patches have been tested
with 
   "make -k check" on rx-sim for c and c++

with no new failures.

OK for trunk?

Cheers,
Oleg

gcc/ChangeLog:

	PR target/83831
	* config/rx/rx-protos.h (rx_reg_dead_or_unused_after_insn,
	rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
	declarations.
	(set_of_reg): New struct.
	(rx_find_set_of_reg, rx_find_use_of_reg): New functions.
	* config/rx/rx.c (rx_reg_dead_or_unused_after_insn,
	rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New
	functions.
	* config/rx/rx.md (andsi3, iorsi3, xorsi3): Convert to insn_and_split.
	Split into bitclr, bitset, bitinvert patterns if appropriate.
	(*bitset, *bitinvert, *bitclr): Convert to named insn_and_split and
	use rx_fuse_in_memory_bitop.
	(*bitset_in_memory, *bitinvert_in_memory, *bitclr_in_memory): Convert
	to named insn, correct maximum insn length.

gcc/testsuite/ChangeLog:

	PR target/83831
	* gcc.target/rx/pr83831.c: New tests.
Index: gcc/config/rx/rx-protos.h
===================================================================
--- gcc/config/rx/rx-protos.h	(revision 257549)
+++ gcc/config/rx/rx-protos.h	(working copy)
@@ -63,6 +63,112 @@
 extern void		rx_split_cbranch (machine_mode, enum rtx_code,
 					  rtx, rtx, rtx);
 extern machine_mode	rx_select_cc_mode (enum rtx_code, rtx, rtx);
+
+extern bool rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno);
+extern void rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src,
+					      rtx_insn* dst);
+
+extern bool rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
+				     rtx (*gen_insn)(rtx, rtx));
+
+/* Result value of rx_find_set_of_reg.  */
+struct set_of_reg
+{
+  /* The insn where sh_find_set_of_reg stopped looking.
+     Can be NULL_RTX if the end of the insn list was reached.  */
+  rtx_insn* insn;
+
+  /* The set rtx of the specified reg if found, NULL_RTX otherwise.  */
+  const_rtx set_rtx;
+
+  /* The set source rtx of the specified reg if found, NULL_RTX otherwise.
+     Usually, this is the most interesting return value.  */
+  rtx set_src;
+};
+
+/* FIXME: Copy-pasta from SH.  Move to rtl.h.
+   Given a reg rtx and a start insn, try to find the insn that sets
+   the specified reg by using the specified insn stepping function,
+   such as 'prev_nonnote_nondebug_insn_bb'.  When the insn is found,
+   try to extract the rtx of the reg set.  */
+template <typename F> inline set_of_reg
+rx_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc,
+		    bool ignore_reg_reg_copies = false)
+{
+  set_of_reg result;
+  result.insn = insn;
+  result.set_rtx = NULL_RTX;
+  result.set_src = NULL_RTX;
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
+    {
+      if (BARRIER_P (i))
+	break;
+      if (!INSN_P (i) || DEBUG_INSN_P (i))
+	  continue;
+      if (reg_set_p (reg, i))
+	{
+	  if (CALL_P (i))
+	    break;
+
+	  result.insn = i;
+	  result.set_rtx = set_of (reg, i);
+
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    break;
+
+	  result.set_src = XEXP (result.set_rtx, 1);
+
+	  if (ignore_reg_reg_copies && REG_P (result.set_src))
+	    {
+	      reg = result.set_src;
+	      continue;
+	    }
+	  if (ignore_reg_reg_copies && SUBREG_P (result.set_src)
+	      && REG_P (SUBREG_REG (result.set_src)))
+	    {
+	      reg = SUBREG_REG (result.set_src);
+	      continue;
+	    }
+
+	  break;
+	}
+    }
+
+  /* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of
+     will return NULL and set_rtx will be NULL.
+     In this case report a 'not found'.  result.insn will always be non-null
+     at this point, so no need to check it.  */
+  if (result.set_src != NULL && result.set_rtx == NULL)
+    result.set_src = NULL;
+
+  return result;
+}
+
+/* FIXME: Move to rtlh.h.  */
+template <typename F> inline rtx_insn*
+rx_find_use_of_reg (rtx reg, rtx_insn* insn, F stepfunc)
+{
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return NULL;
+
+  for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i))
+    {
+      if (BARRIER_P (i))
+	break;
+      if (!INSN_P (i) || DEBUG_INSN_P (i))
+	continue;
+      if (reg_overlap_mentioned_p (reg, PATTERN (i))
+	  || (CALL_P (i) && find_reg_fusage (i, USE, reg)))
+	return i;
+    }
+
+  return NULL;
+}
+
 #endif
 
 #endif /* GCC_RX_PROTOS_H */
Index: gcc/config/rx/rx.c
===================================================================
--- gcc/config/rx/rx.c	(revision 257549)
+++ gcc/config/rx/rx.c	(working copy)
@@ -3439,6 +3439,88 @@
     emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg));
 }
 
+/* Given an insn and a reg number, tell whether the reg dies or is unused
+   after the insn.  */
+bool
+rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
+{
+  return find_regno_note (i, REG_DEAD, regno) != NULL
+	 || find_regno_note (i, REG_UNUSED, regno) != NULL;
+}
+
+/* Copy dead and unused notes from SRC to DST for the specified REGNO.  */
+void
+rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, rtx_insn* dst)
+{
+  int regno = REGNO (SUBREG_P (reg) ? SUBREG_REG (reg) : reg);
+
+  if (rtx note = find_regno_note (src, REG_DEAD, regno))
+    add_shallow_copy_of_reg_note (dst, note);
+
+  if (rtx note = find_regno_note (src, REG_UNUSED, regno))
+    add_shallow_copy_of_reg_note (dst, note);
+}
+
+/* Try to fuse the current bit-operation insn with the surrounding memory load
+   and store.  */
+bool
+rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn,
+			 rtx (*gen_insn)(rtx, rtx))
+{
+  rtx op2_reg = SUBREG_P (operands[2]) ? SUBREG_REG (operands[2]) : operands[2];
+
+  set_of_reg op2_def = rx_find_set_of_reg (op2_reg, curr_insn,
+					   prev_nonnote_nondebug_insn_bb);
+  if (op2_def.set_src == NULL_RTX
+      || !MEM_P (op2_def.set_src)
+      || GET_MODE (op2_def.set_src) != QImode
+      || !rx_is_restricted_memory_address (XEXP (op2_def.set_src, 0),
+					   GET_MODE (op2_def.set_src))
+      || reg_used_between_p (operands[2], op2_def.insn, curr_insn)
+      || !rx_reg_dead_or_unused_after_insn (curr_insn, REGNO (op2_reg))
+    )
+    return false;
+
+  /* The register operand originates from a memory load and the memory load
+     could be fused with the bitop insn.
+     Look for the following memory store with the same memory operand.  */
+  rtx mem = op2_def.set_src;
+
+  /* If the memory is an auto-mod address, it can't be fused.  */
+  if (GET_CODE (XEXP (mem, 0)) == POST_INC
+      || GET_CODE (XEXP (mem, 0)) == PRE_INC
+      || GET_CODE (XEXP (mem, 0)) == POST_DEC
+      || GET_CODE (XEXP (mem, 0)) == PRE_DEC)
+    return false;
+
+  rtx_insn* op0_use = rx_find_use_of_reg (operands[0], curr_insn,
+					  next_nonnote_nondebug_insn_bb);
+  if (op0_use == NULL
+      || !(GET_CODE (PATTERN (op0_use)) == SET
+	   && RX_REG_P (XEXP (PATTERN (op0_use), 1))
+	   && reg_overlap_mentioned_p (operands[0], XEXP (PATTERN (op0_use), 1))
+	   && rtx_equal_p (mem, XEXP (PATTERN (op0_use), 0)))
+      || !rx_reg_dead_or_unused_after_insn (op0_use, REGNO (operands[0]))
+      || reg_set_between_p (operands[2], curr_insn, op0_use))
+    return false;
+
+  /* If the load-modify-store operation is fused it could potentially modify
+     load/store ordering if there are other memory accesses between the load
+     and the store for this insn.  If there are volatile mems between the load
+     and store it's better not to change the ordering.  If there is a call
+     between the load and store, it's also not safe to fuse it.  */
+  for (rtx_insn* i = next_nonnote_nondebug_insn_bb (op2_def.insn);
+       i != NULL && i != op0_use;
+       i = next_nonnote_nondebug_insn_bb (i))
+    if (volatile_insn_p (PATTERN (i)) || CALL_P (i))
+      return false;
+
+  emit_insn (gen_insn (mem, operands[1]));
+  set_insn_deleted (op2_def.insn);
+  set_insn_deleted (op0_use);
+  return true;
+}
+
 /* Implement TARGET_HARD_REGNO_NREGS.  */
 
 static unsigned int
Index: gcc/config/rx/rx.md
===================================================================
--- gcc/config/rx/rx.md	(revision 257549)
+++ gcc/config/rx/rx.md	(working copy)
@@ -1094,7 +1094,7 @@
   DONE;
 })
 
-(define_insn "andsi3"
+(define_insn_and_split "andsi3"
   [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
 	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
 		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@@ -1110,6 +1110,21 @@
   and\t%1, %0
   and\t%2, %1, %0
   and\t%Q2, %0"
+  "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+   && pow2p_hwi (~UINTVAL (operands[2]))"
+ [(const_int 0)]
+{
+  /* For negated single bit constants use the bclr insn for smaller code.  */
+
+  if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+    FAIL;
+
+  rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+    emit_insn (gen_bitclr (operands[0],
+			   GEN_INT (exact_log2 (~UINTVAL (operands[2]))),
+			   operands[1])));
+  DONE;
+}
   [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
    (set_attr "length" "2,2,3,4,5,6,2,5,5")]
 )
@@ -1383,7 +1398,7 @@
   [(set_attr "length" "2,3")]
 )
 
-(define_insn "iorsi3"
+(define_insn_and_split "iorsi3"
   [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
 	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
 	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
@@ -1399,6 +1414,21 @@
   or\t%1, %0
   or\t%2, %1, %0
   or\t%Q2, %0"
+  "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+   && pow2p_hwi (UINTVAL (operands[2]))"
+  [(const_int 0)]
+{
+  /* For single bit constants use the bset insn for smaller code.  */
+
+  if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+    FAIL;
+
+  rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+    emit_insn (gen_bitset (operands[0],
+			   GEN_INT (exact_log2 (UINTVAL (operands[2]))),
+			   operands[1])));
+  DONE;
+}
   [(set_attr "timings" "11,11,11,11,11,11,11,11,33")
    (set_attr "length"  "2,2,3,4,5,6,2,3,5")]
 )
@@ -1704,7 +1734,7 @@
   DONE;
 })
 
-(define_insn "xorsi3"
+(define_insn_and_split "xorsi3"
   [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
 	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
 	        (match_operand:SI 2 "rx_source_operand"
@@ -1712,6 +1742,21 @@
    (clobber (reg:CC CC_REG))]
   ""
   "xor\t%Q2, %0"
+  "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2])
+   && pow2p_hwi (UINTVAL (operands[2]))"
+  [(const_int 0)]
+{
+  /* For single bit constants use the bnot insn for smaller code.  */
+
+  if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG))
+    FAIL;
+
+  rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn,
+    emit_insn (gen_bitinvert (operands[0],
+			      GEN_INT (exact_log2 (UINTVAL (operands[2]))),
+			      operands[1])));
+  DONE;
+}
   [(set_attr "timings" "11,11,11,11,11,33")
    (set_attr "length" "3,4,5,6,7,6")]
 )
@@ -1960,19 +2005,16 @@
 
 ;; Bit manipulation instructions.
 
-;; ??? The *_in_memory patterns will not be matched without further help.
-;; At one time we had the insv expander generate them, but I suspect that
-;; in general we get better performance by exposing the register load to
-;; the optimizers.
-;;
-;; An alternate solution would be to re-organize these patterns such
-;; that allow both register and memory operands.  This would allow the
-;; register allocator to spill and not load the register operand.  This
-;; would be possible only for operations for which we have a constant
-;; bit offset, so that we can adjust the address by ofs/8 and replace
-;; the offset in the insn by ofs%8.
+;; The *_in_memory patterns will not be matched automatically, not even with
+;; combiner bridge patterns.  Especially when the memory operands have a
+;; displacement, the resulting patterns look too complex.
+;; Instead we manually look around the matched insn to see if there is a
+;; preceeding memory load and a following memory store of the modified register
+;; which can be fused into the single *_in_memory insn.
+;; Do that before register allocation, as it can eliminate one temporary
+;; register that needs to be allocated.
 
-(define_insn "*bitset"
+(define_insn_and_split "bitset"
   [(set (match_operand:SI                    0 "register_operand" "=r")
 	(ior:SI (ashift:SI (const_int 1)
 			   (match_operand:SI 1 "rx_shift_operand" "ri"))
@@ -1979,10 +2021,18 @@
 		(match_operand:SI            2 "register_operand" "0")))]
   ""
   "bset\t%1, %0"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitset_in_memory))
+    DONE;
+  else
+    FAIL;
+}
   [(set_attr "length" "3")]
 )
 
-(define_insn "*bitset_in_memory"
+(define_insn "bitset_in_memory"
   [(set (match_operand:QI                    0 "rx_restricted_mem_operand" "+Q")
 	(ior:QI (ashift:QI (const_int 1)
 			   (match_operand:QI 1 "nonmemory_operand" "ri"))
@@ -1989,11 +2039,11 @@
 		(match_dup 0)))]
   ""
   "bset\t%1, %0.B"
-  [(set_attr "length" "3")
+  [(set_attr "length" "5")
    (set_attr "timings" "33")]
 )
 
-(define_insn "*bitinvert"
+(define_insn_and_split "bitinvert"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(xor:SI (ashift:SI (const_int 1)
 			   (match_operand:SI 1 "rx_shift_operand" "ri"))
@@ -2000,10 +2050,18 @@
 		(match_operand:SI 2 "register_operand" "0")))]
   ""
   "bnot\t%1, %0"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitinvert_in_memory))
+    DONE;
+  else
+    FAIL;
+}
   [(set_attr "length" "3")]
 )
 
-(define_insn "*bitinvert_in_memory"
+(define_insn "bitinvert_in_memory"
   [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
 	(xor:QI (ashift:QI (const_int 1)
 			   (match_operand:QI 1 "nonmemory_operand" "ri"))
@@ -2014,7 +2072,7 @@
    (set_attr "timings" "33")]
 )
 
-(define_insn "*bitclr"
+(define_insn_and_split "bitclr"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(and:SI (not:SI
 		  (ashift:SI
@@ -2023,10 +2081,18 @@
 		(match_operand:SI 2 "register_operand" "0")))]
   ""
   "bclr\t%1, %0"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitclr_in_memory))
+    DONE;
+  else
+    FAIL;
+}
   [(set_attr "length" "3")]
 )
 
-(define_insn "*bitclr_in_memory"
+(define_insn "bitclr_in_memory"
   [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q")
 	(and:QI (not:QI
 		  (ashift:QI
@@ -2035,7 +2101,7 @@
 		(match_dup 0)))]
   ""
   "bclr\t%1, %0.B"
-  [(set_attr "length" "3")
+  [(set_attr "length" "5")
    (set_attr "timings" "33")]
 )
 
Index: gcc/testsuite/gcc.target/rx/pr83831.c
===================================================================
--- gcc/testsuite/gcc.target/rx/pr83831.c	(nonexistent)
+++ gcc/testsuite/gcc.target/rx/pr83831.c	(working copy)
@@ -0,0 +1,77 @@
+/* { dg-do compile }  */
+/* { dg-options "-O1" }  */
+/* { dg-final { scan-assembler-times "bclr" 6 } }  */
+/* { dg-final { scan-assembler-times "bset" 6 } }  */
+/* { dg-final { scan-assembler-times "bnot" 6 } }  */
+
+void
+test_0 (char* x, unsigned int y)
+{
+  /* Expect 4x bclr here.  */
+  x[0] &= 0xFE;
+  x[1] = y & ~(1 << 1);
+  x[2] &= 0xFE;
+  x[65000] &= 0xFE;
+}
+
+unsigned int
+test_1 (unsigned int x, unsigned int y)
+{
+  /* Expect 1x bclr here.  */
+  return x & ~(1 << y);
+}
+
+unsigned int
+test_2 (unsigned int x)
+{
+  /* Expect 1x bclr here.  */
+  return x & ~(1 << 1);
+}
+
+void
+test_3 (char* x, unsigned int y)
+{
+  /* Expect 4x bset here.  */
+  x[0] |= 0x10;
+  x[1] = y | (1 << 1);
+  x[2] |= 0x10;
+  x[65000] |= 0x10;
+}
+
+unsigned int
+test_4 (unsigned int x, unsigned int y)
+{
+  /* Expect 1x bset here.  */
+  return x | (1 << y);
+}
+
+unsigned int
+test_5 (unsigned int x)
+{
+  /* Expect 1x bset here.  */
+  return x | (1 << 8);
+}
+
+void
+test_6 (char* x, unsigned int y)
+{
+  /* Expect 4x bnot here.  */
+  x[0] ^= 0x10;
+  x[1] = y ^ (1 << 1);
+  x[2] ^= 0x10;
+  x[65000] ^= 0x10;
+}
+
+unsigned int
+test_7 (unsigned int x, unsigned int y)
+{
+  /* Expect 1x bnot here.  */
+  return x ^ (1 << y);
+}
+
+unsigned int
+test_8 (unsigned int x)
+{
+  /* Expect 1x bnot here.  */
+  return x ^ (1 << 8);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]