This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: ia32: genrecog & peep2


> 
> Do you have any further concerns about the peephole2 generation?

As I mentioned in my first mail, I'm not convinced that the current way of
selecting a free register will work well for all imaginable sequences we may
want to put into peephole2 patterns.  The patch below tries to address this.

In a peephole2 pattern, the first element now contains a sequence of insns,
with match_scratches and match_dups of them in between.  For recognition
purposes, the match_scratch/dup parts are ignored.  They specify at which
points in the original sequence a free register must be available.  For
every scratch register you need, you place a match_scratch into the insn
sequence at the point where it must be available.  If it must remain
available over any of the following insns, put a match_dup of the scratch
after the last insn during which it must be available.

Here's a stupid example (which doesn't even use the scratches it allocates):

(define_peephole2
  [(match_scratch:SI 6 "r")
   (set (match_operand:SI 0 "register_operand" "")
        (match_operand:SI 1 "general_operand" ""))
   (match_scratch:SI 7 "r")
   (set (match_operand:SI 2 "register_operand" "")
        (match_operand:SI 3 "register_operand" ""))
   (match_dup 7)
   (match_scratch:SI 8 "r")
   (match_dup 6)
   (set (match_operand:SI 4 "register_operand" "")
        (match_operand:SI 5 "register_operand" ""))]
  "0"
  [(set (match_dup 0) (match_dup 1))
   (set (match_dup 2) (match_dup 3))
   (set (match_dup 4) (match_dup 5))]
  "")


I don't particularly like this way of describing the required lifetime, but
it seems to do the job, at least for the majority of the cases I can think
of.

I've tried to test the patch, but the current newia32 branch appears to
trigger an unrelated abort in the c++ when building libgcc.  However, "make
check" does seem to work properly.

Note that I believe the change in recog.c is necessary to avoid blowing up
on CODE_LABELs.

Bernd

	* genemit.c (output_peephole2_scratch): Deleted.
	(output_peephole2_scratches): New function.
	(gen_split): Add ATTRIBUTE_UNUSED to curr_insn arg.
	For DEFINE_PEEPHOLE2, call output_peephole2_scratches.
	* genrecog.c (make_insn_sequence): Strip MATCH_SCRATCH and MATCH_DUP
	elements off our vector for DEFINE_PEEPHOLE@ patterns.
	* recog.c (recog_next_insn): Move some checks into the loop.
	* resource.c (find_free_register): New arg LAST_INSN.  If it is
	nonzero, make sure insns in the range CURRENT_INSN ... LAST_INSN
	do not clobber the register.
	* resource.h (find_free_register): Fix prototype to match definition.
	* i386.md (peephole2 patterns): Fix to match changes above.

Index: genemit.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/genemit.c,v
retrieving revision 1.30.2.3
diff -u -p -r1.30.2.3 genemit.c
--- genemit.c	1999/08/27 12:53:25	1.30.2.3
+++ genemit.c	1999/08/27 15:59:01
@@ -74,7 +74,7 @@ static void gen_split			PROTO((rtx));
 static void output_add_clobbers		PROTO((void));
 static void output_init_mov_optab	PROTO((void));
 static void gen_rtx_scratch		PROTO((rtx, enum rtx_code));
-static void output_peephole2_scratch	PROTO((rtx));
+static void output_peephole2_scratches	PROTO((rtx));
 
 
 static void
@@ -576,8 +576,9 @@ gen_split (split)
     {
       printf ("extern rtx gen_%s_%d PROTO ((rtx, rtx *));\n",
 	      name, insn_code_number);
-      printf ("rtx\ngen_%s_%d (curr_insn, operands)\n     rtx curr_insn;\n\
-    rtx *operands;\n", 
+      printf ("rtx\ngen_%s_%d (curr_insn, operands)\n\
+     rtx curr_insn ATTRIBUTE_UNUSED;\n\
+     rtx *operands;\n", 
 	      name, insn_code_number);
     }
   else
@@ -594,15 +595,7 @@ gen_split (split)
   printf ("  rtx _val = 0;\n");
 
   if (GET_CODE (split) == DEFINE_PEEPHOLE2)
-    {
-      printf ("  HARD_REG_SET _regs_allocated;\n");
-      printf ("  CLEAR_HARD_REG_SET (_regs_allocated);\n");
-
-      for (i = 0; i < XVECLEN (split, 2); i++)
-	{
-	  output_peephole2_scratch (XVECEXP (split, 2, i));
-	}
-    }
+    output_peephole2_scratches (split);
 
   printf ("  start_sequence ();\n");
 
@@ -703,39 +696,52 @@ output_add_clobbers ()
 }
 
 /* Generate code to invoke find_free_register () as needed for the
-   scratch registers used by the peephole2 pattern in INSN. */
+   scratch registers used by the peephole2 pattern in SPLIT. */
 
 static void
-output_peephole2_scratch (insn)
-     rtx insn;
+output_peephole2_scratches (split)
+     rtx split;
 {
-  RTX_CODE code = GET_CODE (insn);
+  int i;
+  int insn_nr = 0;
 
-  if (code == MATCH_SCRATCH)
+  printf ("  rtx first_insn ATTRIBUTE_UNUSED;\n");
+  printf ("  rtx last_insn ATTRIBUTE_UNUSED;\n");
+  printf ("  HARD_REG_SET _regs_allocated;\n");
+
+  printf ("  CLEAR_HARD_REG_SET (_regs_allocated);\n");
+
+  for (i = 0; i < XVECLEN (split, 0); i++)
     {
-      printf ("  if ((operands[%d] = find_free_register (curr_insn, \"%s\", %smode, &_regs_allocated)) == NULL_RTX)\n\
+      rtx elt = XVECEXP (split, 0, i);
+      if (GET_CODE (elt) == MATCH_SCRATCH)
+	{
+	  int last_insn_nr = insn_nr;
+	  int cur_insn_nr = insn_nr;
+	  int j;
+	  for (j = i + 1; j < XVECLEN (split, 0); j++)
+	    if (GET_CODE (XVECEXP (split, 0, j)) == MATCH_DUP)
+	      {
+		if (XINT (XVECEXP (split, 0, j), 0) == XINT (elt, 0))
+		  last_insn_nr = cur_insn_nr;
+	      }
+	    else if (GET_CODE (XVECEXP (split, 0, j)) != MATCH_SCRATCH)
+	      cur_insn_nr++;
+	  printf ("  first_insn = recog_next_insn (curr_insn, %d);\n", insn_nr);
+	  if (last_insn_nr > insn_nr)
+	    printf ("  last_insn = recog_next_insn (curr_insn, %d);\n",
+		    last_insn_nr - 1);
+	  else
+	    printf ("  last_insn = 0;\n");
+	  printf ("  if ((operands[%d] = find_free_register (first_insn, last_insn, \"%s\", %smode, &_regs_allocated)) == NULL_RTX)\n\
     return NULL;\n", 
-	      XINT (insn, 0),
-	      XSTR (insn, 1),
-	      GET_MODE_NAME (GET_MODE (insn)));
-    }
-  else
-    {
-      int i;
-      char *fmt = GET_RTX_FORMAT (code);
-      int len = GET_RTX_LENGTH (code);
+		  XINT (elt, 0),
+		  XSTR (elt, 1),
+		  GET_MODE_NAME (GET_MODE (elt)));
 
-      for (i = 0; i < len; i++)
-	{
-	  if (fmt[i] == 'e' || fmt[i] == 'u')
-	    output_peephole2_scratch (XEXP (insn, i));
-	  else if (fmt[i] == 'E')
-	    {
-	      int j;
-	      for (j = 0; j < XVECLEN (insn, i); j++)
-		output_peephole2_scratch (XVECEXP (insn, i, j));
-	    }
 	}
+      else if (GET_CODE (elt) != MATCH_DUP)
+	insn_nr++;
     }
 }
 
Index: genrecog.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/genrecog.c,v
retrieving revision 1.33.2.3
diff -u -p -r1.33.2.3 genrecog.c
--- genrecog.c	1999/08/27 10:10:07	1.33.2.3
+++ genrecog.c	1999/08/27 15:59:02
@@ -251,9 +251,29 @@ make_insn_sequence (insn, type)
     insn_name_ptr[next_insn_code] = name;
   }  
 
-  /* peephole2 always gets an outer parallel even if it's only one
-     entry. */
-  if (type != PEEPHOLE2 && XVECLEN (insn, type == RECOG) == 1)
+  if (type == PEEPHOLE2)
+    {
+      int i, j;
+
+      /* peephole2 gets special treatment:
+	 - X always gets an outer parallel even if it's only one entry
+	 - we remove all traces of outer-level match_scratch and match_dup
+           expressions here.  */
+      x = rtx_alloc (PARALLEL);
+      PUT_MODE (x, VOIDmode);
+      XVEC (x, 0) = rtvec_alloc (XVECLEN (insn, 0));
+      for (i = j = 0; i < XVECLEN (insn, 0); i++)
+	{
+	  rtx tmp = XVECEXP (insn, 0, i);
+	  if (GET_CODE (tmp) != MATCH_SCRATCH && GET_CODE (tmp) != MATCH_DUP)
+	    {
+	      XVECEXP (x, 0, j) = tmp;
+	      j++;
+	    }
+	}
+      XVECLEN (x, 0) = j;
+    }
+  else if (XVECLEN (insn, type == RECOG) == 1)
     x = XVECEXP (insn, type == RECOG, 0);
   else
     {
Index: recog.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/recog.c,v
retrieving revision 1.33.2.5
diff -u -p -r1.33.2.5 recog.c
--- recog.c	1999/08/27 10:10:08	1.33.2.5
+++ recog.c	1999/08/27 15:59:03
@@ -2703,13 +2703,15 @@ recog_next_insn (insn, n)
   while (insn != NULL_RTX && n > 0)
     {
       insn = next_nonnote_insn (insn);
+
+      if (insn == NULL_RTX)
+	return insn;
+
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	return NULL_RTX;
+
       n--;
     }
-  if (insn == NULL_RTX)
-    return insn;
-
-  if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
-    return NULL_RTX;
 
   return insn;
 }
Index: resource.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/resource.c,v
retrieving revision 1.6.2.3
diff -u -p -r1.6.2.3 resource.c
--- resource.c	1999/08/25 23:31:03	1.6.2.3
+++ resource.c	1999/08/27 15:59:03
@@ -1239,17 +1239,20 @@ mark_end_of_function_resources (trial, i
 			     include_delayed_effects);
 }
 
-/* Try to find an available hard register of mode MODE at
-   CURRENT_INSN, matching the register class in CLASS_STR. Registers
-   that already have bits set in REG_SET will not be considered.
+/* Try to find a hard register of mode MODE, matching the register class in
+   CLASS_STR, which is available at the beginning of insn CURRENT_INSN and
+   remains available until the end of LAST_INSN.  LAST_INSN may be NULL_RTX,
+   in which case the only condition is that the register must be available
+   before CURRENT_INSN.
+   Registers that already have bits set in REG_SET will not be considered.
 
    If an appropriate register is available, it will be returned and the
    corresponding bit(s) in REG_SET will be set; otherwise, NULL_RTX is
    returned.  */
 
 rtx
-find_free_register (current_insn, class_str, mode, reg_set)
-     rtx current_insn;
+find_free_register (current_insn, last_insn, class_str, mode, reg_set)
+     rtx current_insn, last_insn;
      char *class_str;
      int mode;
      HARD_REG_SET *reg_set;
@@ -1261,6 +1264,14 @@ find_free_register (current_insn, class_
     = (clet == 'r' ? GENERAL_REGS :  REG_CLASS_FROM_LETTER (clet));
 
   mark_target_live_regs (get_insns (), current_insn, &used);
+  if (last_insn)
+    while (current_insn != last_insn)
+      {
+	/* Exclude anything set in this insn.  */
+	mark_set_resources (PATTERN (current_insn), &used, 0, 1);
+	current_insn = next_nonnote_insn (current_insn);
+      }
+
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     {
Index: resource.h
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/resource.h,v
retrieving revision 1.1
diff -u -p -r1.1 resource.h
--- resource.h	1999/02/02 21:22:48	1.1
+++ resource.h	1999/08/27 15:59:03
@@ -42,5 +42,5 @@ extern void incr_ticks_for_insn		PROTO((
 extern void mark_end_of_function_resources PROTO ((rtx, int));
 extern void init_resource_info		PROTO((rtx));
 extern void free_resource_info		PROTO((void));
-extern rtx find_free_register		PROTO((rtx, char *, int,
+extern rtx find_free_register		PROTO((rtx, rtx, char *, int,
 					       HARD_REG_SET *));
Index: config/i386/i386.md
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.78.2.11
diff -u -p -r1.78.2.11 i386.md
--- i386.md	1999/07/20 05:36:42	1.78.2.11
+++ i386.md	1999/08/27 15:59:04
@@ -7757,98 +7757,108 @@
 
 ;; Don't push memory operands
 (define_peephole2
-  [(set (match_operand:SI 0 "push_operand" "")
+  [(match_scratch:SI 2 "r")
+   (set (match_operand:SI 0 "push_operand" "")
 	(match_operand:SI 1 "memory_operand" ""))]
   "! optimize_size && ! TARGET_PUSH_MEMORY"
-  [(set (match_scratch:SI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_peephole2
-  [(set (match_operand:HI 0 "push_operand" "")
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "push_operand" "")
 	(match_operand:HI 1 "memory_operand" ""))]
   "! optimize_size && ! TARGET_PUSH_MEMORY"
-  [(set (match_scratch:HI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_peephole2
-  [(set (match_operand:QI 0 "push_operand" "")
+  [(match_scratch:QI 2 "q")
+   (set (match_operand:QI 0 "push_operand" "")
 	(match_operand:QI 1 "memory_operand" ""))]
   "! optimize_size && ! TARGET_PUSH_MEMORY"
-  [(set (match_scratch:QI 2 "q") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 ;; Don't move an immediate directly to memory when the instruction
 ;; gets too big.
 (define_peephole2
-  [(set (match_operand:SI 0 "memory_operand" "")
+  [(match_scratch:SI 1 "r")
+   (set (match_operand:SI 0 "memory_operand" "")
         (const_int 0))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
    && reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
    && TARGET_SPLIT_LONG_MOVES"
-  [(parallel [(set (match_scratch:SI 1 "r") (const_int 0))
+  [(parallel [(set (match_dup 1) (const_int 0))
 	      (clobber (reg:CC 17))])
    (set (match_dup 0) (match_dup 1))]
   "")
 
 (define_peephole2
-  [(set (match_operand:HI 0 "memory_operand" "")
+  [(match_scratch:HI 1 "r")
+   (set (match_operand:HI 0 "memory_operand" "")
         (const_int 0))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
    && reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
    && TARGET_SPLIT_LONG_MOVES"
-  [(parallel [(set (match_scratch:HI 1 "r") (const_int 0))
+  [(parallel [(set (match_dup 1) (const_int 0))
 	      (clobber (reg:CC 17))])
    (set (match_dup 0) (match_dup 1))]
   "")
 
 (define_peephole2
-  [(set (match_operand:QI 0 "memory_operand" "")
+  [(match_scratch:QI 1 "q")
+   (set (match_operand:QI 0 "memory_operand" "")
         (const_int 0))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
    && reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
    && TARGET_SPLIT_LONG_MOVES"
-  [(parallel [(set (match_scratch:QI 1 "q") (const_int 0))
+  [(parallel [(set (match_dup 1) (const_int 0))
 	      (clobber (reg:CC 17))])
    (set (match_dup 0) (match_dup 1))]
   "")
 
 (define_peephole2
-  [(set (match_operand:SI 0 "memory_operand" "")
+  [(match_scratch:SI 2 "r")
+   (set (match_operand:SI 0 "memory_operand" "")
         (match_operand:SI 1 "immediate_operand" ""))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
   && TARGET_SPLIT_LONG_MOVES"
-  [(set (match_scratch:SI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_peephole2
-  [(set (match_operand:HI 0 "memory_operand" "")
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "memory_operand" "")
         (match_operand:HI 1 "immediate_operand" ""))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
   && TARGET_SPLIT_LONG_MOVES"
-  [(set (match_scratch:HI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_peephole2
-  [(set (match_operand:QI 0 "memory_operand" "")
+  [(match_scratch:SI 2 "q")
+   (set (match_operand:QI 0 "memory_operand" "")
         (match_operand:QI 1 "immediate_operand" ""))]
   "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
   && TARGET_SPLIT_LONG_MOVES"
-  [(set (match_scratch:QI 2 "q") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 ;; Don't compare memory with zero, load and use a test instead.
 (define_peephole2
-  [(set (reg:CCNO 17)
+  [(match_scratch:SI 3 "r")
+   (set (reg:CCNO 17)
 	(compare:CCNO (match_operand:SI 0 "memory_operand" "")
 	(const_int 0)))]
   "! optimize_size"
-   [(set (match_scratch:SI 3 "r") (match_dup 0))
+   [(set (match_dup 3) (match_dup 0))
     (set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))]
   "")
 
@@ -7999,26 +8009,28 @@
 
 ;; Don't do logical operations with memory inputs.
 (define_peephole2
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
                    (match_operator:SI 3 "arith_or_logical_operator"
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC 17))])]
   "! optimize_size && ! TARGET_READ_MODIFY"
-  [(set (match_scratch:SI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
               (clobber (reg:CC 17))])]
   "")
 
 (define_peephole2
-  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
                    (match_operator:SI 3 "arith_or_logical_operator"
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC 17))])]
   "! optimize_size && ! TARGET_READ_MODIFY"
-  [(set (match_scratch:SI 2 "r") (match_dup 1))
+  [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
               (clobber (reg:CC 17))])]
@@ -8031,13 +8043,14 @@
 ; the same decoder scheduling characteristics as the original.
 
 (define_peephole2
-  [(parallel [(set (match_operand:SI 0 "memory_operand" "")
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
                    (match_operator:SI 3 "arith_or_logical_operator"
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC 17))])]
   "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
-  [(set (match_scratch:SI 2 "r") (match_dup 0))
+  [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
               (clobber (reg:CC 17))])
@@ -8045,13 +8058,14 @@
   "")
 
 (define_peephole2
-  [(parallel [(set (match_operand:SI 0 "memory_operand" "")
+  [(match_scratch:SI 2 "r")
+   (parallel [(set (match_operand:SI 0 "memory_operand" "")
                    (match_operator:SI 3 "arith_or_logical_operator"
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC 17))])]
   "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
-  [(set (match_scratch:SI 2 "r") (match_dup 0))
+  [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
               (clobber (reg:CC 17))])


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]