[x86_64 PATCH] PR target/11877: Use xor to write zero to memory with -Os

Jakub Jelinek jakub@redhat.com
Mon Jun 21 10:28:35 GMT 2021


On Mon, Jun 21, 2021 at 12:14:09PM +0200, Richard Biener wrote:
> > But we could do what I've done in
> > r11-7694-gd55ce33a34a8e33d17285228b32cf1e564241a70
> > - have int ix86_last_zero_store_uid;
> > set to INSN_UID of the last store emitted by the peephole2s and
> > then check that INSN_UID against the var.
> 
> Hmm, or have reg_nonzero_bits_for_peephole2 () and maintain
> that somehow ... (conservatively drop it when a SET is seen).

Maintaining something in peephole2 wouldn't be that easy because
of peephole2's rolling window, plus it would need to be done
in the generic code even when nothing but a single target in a specific case
needs that.

The following seems to work.

2021-06-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/11877
	* config/i386/i386-protos.h (ix86_last_zero_store_uid): Declare.
	* config/i386/i386-expand.c (ix86_last_zero_store_uid): New variable.
	* config/i386/i386.c (ix86_expand_prologue): Clear it.
	* config/i386/i386.md (peephole2s for 1/2/4 stores of const0_rtx):
	Remove "" from match_operand.  Emit new insns using emit_move_insn and
	set ix86_last_zero_store_uid to INSN_UID of the last store.
	Add peephole2s for 1/2/4 stores of const0_rtx following previous
	successful peep2s.

--- gcc/config/i386/i386-protos.h.jj	2021-06-21 11:59:16.769693735 +0200
+++ gcc/config/i386/i386-protos.h	2021-06-21 12:01:47.875691930 +0200
@@ -111,6 +111,7 @@ extern bool ix86_use_lea_for_mov (rtx_in
 extern bool ix86_avoid_lea_for_addr (rtx_insn *, rtx[]);
 extern void ix86_split_lea_for_addr (rtx_insn *, rtx[], machine_mode);
 extern bool ix86_lea_for_add_ok (rtx_insn *, rtx[]);
+extern int ix86_last_zero_store_uid;
 extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high);
 extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
 extern bool ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn);
--- gcc/config/i386/i386-expand.c.jj	2021-06-21 09:39:21.604488082 +0200
+++ gcc/config/i386/i386-expand.c	2021-06-21 12:21:33.017977951 +0200
@@ -1316,6 +1316,9 @@ find_nearest_reg_def (rtx_insn *insn, in
   return false;
 }
 
+/* INSN_UID of the last insn emitted by zero store peephole2s.  */
+int ix86_last_zero_store_uid;
+
 /* Split lea instructions into a sequence of instructions
    which are executed on ALU to avoid AGU stalls.
    It is assumed that it is allowed to clobber flags register
--- gcc/config/i386/i386.c.jj	2021-06-21 09:39:21.622487840 +0200
+++ gcc/config/i386/i386.c	2021-06-21 12:06:54.049634337 +0200
@@ -8196,6 +8196,7 @@ ix86_expand_prologue (void)
   bool save_stub_call_needed;
   rtx static_chain = NULL_RTX;
 
+  ix86_last_zero_store_uid = 0;
   if (ix86_function_naked (current_function_decl))
     {
       if (flag_stack_usage_info)
--- gcc/config/i386/i386.md.jj	2021-06-21 09:42:04.086303699 +0200
+++ gcc/config/i386/i386.md	2021-06-21 12:14:10.411847549 +0200
@@ -19360,37 +19360,96 @@ (define_peephole2
 ;; When optimizing for size, zeroing memory should use a register.
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 3 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 4 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))
-   (set (match_dup 2) (match_dup 0))
-   (set (match_dup 3) (match_dup 0))
-   (set (match_dup 4) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  emit_move_insn (operands[2], operands[0]);
+  emit_move_insn (operands[3], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[4], operands[0]));
+  DONE;
 })
 
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))
-   (set (match_operand:SWI48 2 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))
-   (set (match_dup 2) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[2], operands[0]));
+  DONE;
 })
 
 (define_peephole2
   [(match_scratch:SWI48 0 "r")
-   (set (match_operand:SWI48 1 "memory_operand" "") (const_int 0))]
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
   "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)"
-  [(set (match_dup 1) (match_dup 0))]
+  [(const_int 0)]
 {
   ix86_expand_clear (operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[1], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 5 "memory_operand")
+	(match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 3 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 4 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[5], operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  emit_move_insn (operands[2], operands[0]);
+  emit_move_insn (operands[3], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[4], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 3 "memory_operand")
+	(match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))
+   (set (match_operand:SWI48 2 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[3], operands[0]);
+  emit_move_insn (operands[1], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[2], operands[0]));
+  DONE;
+})
+
+(define_peephole2
+  [(set (match_operand:SWI48 2 "memory_operand")
+	(match_operand:SWI48 0 "general_reg_operand"))
+   (set (match_operand:SWI48 1 "memory_operand") (const_int 0))]
+  "optimize_insn_for_size_p ()
+   && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[2], operands[0]);
+  ix86_last_zero_store_uid
+    = INSN_UID (emit_move_insn (operands[1], operands[0]));
+  DONE;
 })
 
 ;; Reload dislikes loading constants directly into class_likely_spilled


	Jakub



More information about the Gcc-patches mailing list