This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: ia32: genrecog & peep2
- To: Richard Henderson <rth at cygnus dot com>
- Subject: Re: ia32: genrecog & peep2
- From: Bernd Schmidt <bernds at cygnus dot co dot uk>
- Date: Fri, 27 Aug 1999 17:05:23 +0100 (BST)
- cc: gcc-patches at gcc dot gnu dot org
>
> Do you have any further concerns about the peephole2 generation?
As I mentioned in my first mail, I'm not convinced that the current way of
selecting a free register will work well for all imaginable sequences we may
want to put into peephole2 patterns. The patch below tries to address this.
In a peephole2 pattern, the first element now contains a sequence of insns,
with match_scratches and match_dups of them in between. For recognition
purposes, the match_scratch/dup parts are ignored. They specify at which
points in the original sequence a free register must be available. For
every scratch register you need, you place a match_scratch into the insn
sequence at the point where it must be available. If it must remain
available over any of the following insns, put a match_dup of the scratch
after the last insn during which it must be available.
Here's a stupid example (which doesn't even use the scratches it allocates):
(define_peephole2
[(match_scratch:SI 6 "r")
(set (match_operand:SI 0 "register_operand" "")
(match_operand:SI 1 "general_operand" ""))
(match_scratch:SI 7 "r")
(set (match_operand:SI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" ""))
(match_dup 7)
(match_scratch:SI 8 "r")
(match_dup 6)
(set (match_operand:SI 4 "register_operand" "")
(match_operand:SI 5 "register_operand" ""))]
"0"
[(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))
(set (match_dup 4) (match_dup 5))]
"")
I don't particularly like this way of describing the required lifetime, but
it seems to do the job, at least for the majority of the cases I can think
of.
I've tried to test the patch, but the current newia32 branch appears to
trigger an unrelated abort in the c++ when building libgcc. However, "make
check" does seem to work properly.
Note that I believe the change in recog.c is necessary to avoid blowing up
on CODE_LABELs.
Bernd
* genemit.c (output_peephole2_scratch): Deleted.
(output_peephole2_scratches): New function.
(gen_split): Add ATTRIBUTE_UNUSED to curr_insn arg.
For DEFINE_PEEPHOLE2, call output_peephole2_scratches.
* genrecog.c (make_insn_sequence): Strip MATCH_SCRATCH and MATCH_DUP
elements off our vector for DEFINE_PEEPHOLE@ patterns.
* recog.c (recog_next_insn): Move some checks into the loop.
* resource.c (find_free_register): New arg LAST_INSN. If it is
nonzero, make sure insns in the range CURRENT_INSN ... LAST_INSN
do not clobber the register.
* resource.h (find_free_register): Fix prototype to match definition.
* i386.md (peephole2 patterns): Fix to match changes above.
Index: genemit.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/genemit.c,v
retrieving revision 1.30.2.3
diff -u -p -r1.30.2.3 genemit.c
--- genemit.c 1999/08/27 12:53:25 1.30.2.3
+++ genemit.c 1999/08/27 15:59:01
@@ -74,7 +74,7 @@ static void gen_split PROTO((rtx));
static void output_add_clobbers PROTO((void));
static void output_init_mov_optab PROTO((void));
static void gen_rtx_scratch PROTO((rtx, enum rtx_code));
-static void output_peephole2_scratch PROTO((rtx));
+static void output_peephole2_scratches PROTO((rtx));
static void
@@ -576,8 +576,9 @@ gen_split (split)
{
printf ("extern rtx gen_%s_%d PROTO ((rtx, rtx *));\n",
name, insn_code_number);
- printf ("rtx\ngen_%s_%d (curr_insn, operands)\n rtx curr_insn;\n\
- rtx *operands;\n",
+ printf ("rtx\ngen_%s_%d (curr_insn, operands)\n\
+ rtx curr_insn ATTRIBUTE_UNUSED;\n\
+ rtx *operands;\n",
name, insn_code_number);
}
else
@@ -594,15 +595,7 @@ gen_split (split)
printf (" rtx _val = 0;\n");
if (GET_CODE (split) == DEFINE_PEEPHOLE2)
- {
- printf (" HARD_REG_SET _regs_allocated;\n");
- printf (" CLEAR_HARD_REG_SET (_regs_allocated);\n");
-
- for (i = 0; i < XVECLEN (split, 2); i++)
- {
- output_peephole2_scratch (XVECEXP (split, 2, i));
- }
- }
+ output_peephole2_scratches (split);
printf (" start_sequence ();\n");
@@ -703,39 +696,52 @@ output_add_clobbers ()
}
/* Generate code to invoke find_free_register () as needed for the
- scratch registers used by the peephole2 pattern in INSN. */
+ scratch registers used by the peephole2 pattern in SPLIT. */
static void
-output_peephole2_scratch (insn)
- rtx insn;
+output_peephole2_scratches (split)
+ rtx split;
{
- RTX_CODE code = GET_CODE (insn);
+ int i;
+ int insn_nr = 0;
- if (code == MATCH_SCRATCH)
+ printf (" rtx first_insn ATTRIBUTE_UNUSED;\n");
+ printf (" rtx last_insn ATTRIBUTE_UNUSED;\n");
+ printf (" HARD_REG_SET _regs_allocated;\n");
+
+ printf (" CLEAR_HARD_REG_SET (_regs_allocated);\n");
+
+ for (i = 0; i < XVECLEN (split, 0); i++)
{
- printf (" if ((operands[%d] = find_free_register (curr_insn, \"%s\", %smode, &_regs_allocated)) == NULL_RTX)\n\
+ rtx elt = XVECEXP (split, 0, i);
+ if (GET_CODE (elt) == MATCH_SCRATCH)
+ {
+ int last_insn_nr = insn_nr;
+ int cur_insn_nr = insn_nr;
+ int j;
+ for (j = i + 1; j < XVECLEN (split, 0); j++)
+ if (GET_CODE (XVECEXP (split, 0, j)) == MATCH_DUP)
+ {
+ if (XINT (XVECEXP (split, 0, j), 0) == XINT (elt, 0))
+ last_insn_nr = cur_insn_nr;
+ }
+ else if (GET_CODE (XVECEXP (split, 0, j)) != MATCH_SCRATCH)
+ cur_insn_nr++;
+ printf (" first_insn = recog_next_insn (curr_insn, %d);\n", insn_nr);
+ if (last_insn_nr > insn_nr)
+ printf (" last_insn = recog_next_insn (curr_insn, %d);\n",
+ last_insn_nr - 1);
+ else
+ printf (" last_insn = 0;\n");
+ printf (" if ((operands[%d] = find_free_register (first_insn, last_insn, \"%s\", %smode, &_regs_allocated)) == NULL_RTX)\n\
return NULL;\n",
- XINT (insn, 0),
- XSTR (insn, 1),
- GET_MODE_NAME (GET_MODE (insn)));
- }
- else
- {
- int i;
- char *fmt = GET_RTX_FORMAT (code);
- int len = GET_RTX_LENGTH (code);
+ XINT (elt, 0),
+ XSTR (elt, 1),
+ GET_MODE_NAME (GET_MODE (elt)));
- for (i = 0; i < len; i++)
- {
- if (fmt[i] == 'e' || fmt[i] == 'u')
- output_peephole2_scratch (XEXP (insn, i));
- else if (fmt[i] == 'E')
- {
- int j;
- for (j = 0; j < XVECLEN (insn, i); j++)
- output_peephole2_scratch (XVECEXP (insn, i, j));
- }
}
+ else if (GET_CODE (elt) != MATCH_DUP)
+ insn_nr++;
}
}
Index: genrecog.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/genrecog.c,v
retrieving revision 1.33.2.3
diff -u -p -r1.33.2.3 genrecog.c
--- genrecog.c 1999/08/27 10:10:07 1.33.2.3
+++ genrecog.c 1999/08/27 15:59:02
@@ -251,9 +251,29 @@ make_insn_sequence (insn, type)
insn_name_ptr[next_insn_code] = name;
}
- /* peephole2 always gets an outer parallel even if it's only one
- entry. */
- if (type != PEEPHOLE2 && XVECLEN (insn, type == RECOG) == 1)
+ if (type == PEEPHOLE2)
+ {
+ int i, j;
+
+ /* peephole2 gets special treatment:
+ - X always gets an outer parallel even if it's only one entry
+ - we remove all traces of outer-level match_scratch and match_dup
+ expressions here. */
+ x = rtx_alloc (PARALLEL);
+ PUT_MODE (x, VOIDmode);
+ XVEC (x, 0) = rtvec_alloc (XVECLEN (insn, 0));
+ for (i = j = 0; i < XVECLEN (insn, 0); i++)
+ {
+ rtx tmp = XVECEXP (insn, 0, i);
+ if (GET_CODE (tmp) != MATCH_SCRATCH && GET_CODE (tmp) != MATCH_DUP)
+ {
+ XVECEXP (x, 0, j) = tmp;
+ j++;
+ }
+ }
+ XVECLEN (x, 0) = j;
+ }
+ else if (XVECLEN (insn, type == RECOG) == 1)
x = XVECEXP (insn, type == RECOG, 0);
else
{
Index: recog.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/recog.c,v
retrieving revision 1.33.2.5
diff -u -p -r1.33.2.5 recog.c
--- recog.c 1999/08/27 10:10:08 1.33.2.5
+++ recog.c 1999/08/27 15:59:03
@@ -2703,13 +2703,15 @@ recog_next_insn (insn, n)
while (insn != NULL_RTX && n > 0)
{
insn = next_nonnote_insn (insn);
+
+ if (insn == NULL_RTX)
+ return insn;
+
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ return NULL_RTX;
+
n--;
}
- if (insn == NULL_RTX)
- return insn;
-
- if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
- return NULL_RTX;
return insn;
}
Index: resource.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/resource.c,v
retrieving revision 1.6.2.3
diff -u -p -r1.6.2.3 resource.c
--- resource.c 1999/08/25 23:31:03 1.6.2.3
+++ resource.c 1999/08/27 15:59:03
@@ -1239,17 +1239,20 @@ mark_end_of_function_resources (trial, i
include_delayed_effects);
}
-/* Try to find an available hard register of mode MODE at
- CURRENT_INSN, matching the register class in CLASS_STR. Registers
- that already have bits set in REG_SET will not be considered.
+/* Try to find a hard register of mode MODE, matching the register class in
+ CLASS_STR, which is available at the beginning of insn CURRENT_INSN and
+ remains available until the end of LAST_INSN. LAST_INSN may be NULL_RTX,
+ in which case the only condition is that the register must be available
+ before CURRENT_INSN.
+ Registers that already have bits set in REG_SET will not be considered.
If an appropriate register is available, it will be returned and the
corresponding bit(s) in REG_SET will be set; otherwise, NULL_RTX is
returned. */
rtx
-find_free_register (current_insn, class_str, mode, reg_set)
- rtx current_insn;
+find_free_register (current_insn, last_insn, class_str, mode, reg_set)
+ rtx current_insn, last_insn;
char *class_str;
int mode;
HARD_REG_SET *reg_set;
@@ -1261,6 +1264,14 @@ find_free_register (current_insn, class_
= (clet == 'r' ? GENERAL_REGS : REG_CLASS_FROM_LETTER (clet));
mark_target_live_regs (get_insns (), current_insn, &used);
+ if (last_insn)
+ while (current_insn != last_insn)
+ {
+ /* Exclude anything set in this insn. */
+ mark_set_resources (PATTERN (current_insn), &used, 0, 1);
+ current_insn = next_nonnote_insn (current_insn);
+ }
+
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
Index: resource.h
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/resource.h,v
retrieving revision 1.1
diff -u -p -r1.1 resource.h
--- resource.h 1999/02/02 21:22:48 1.1
+++ resource.h 1999/08/27 15:59:03
@@ -42,5 +42,5 @@ extern void incr_ticks_for_insn PROTO((
extern void mark_end_of_function_resources PROTO ((rtx, int));
extern void init_resource_info PROTO((rtx));
extern void free_resource_info PROTO((void));
-extern rtx find_free_register PROTO((rtx, char *, int,
+extern rtx find_free_register PROTO((rtx, rtx, char *, int,
HARD_REG_SET *));
Index: config/i386/i386.md
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.78.2.11
diff -u -p -r1.78.2.11 i386.md
--- i386.md 1999/07/20 05:36:42 1.78.2.11
+++ i386.md 1999/08/27 15:59:04
@@ -7757,98 +7757,108 @@
;; Don't push memory operands
(define_peephole2
- [(set (match_operand:SI 0 "push_operand" "")
+ [(match_scratch:SI 2 "r")
+ (set (match_operand:SI 0 "push_operand" "")
(match_operand:SI 1 "memory_operand" ""))]
"! optimize_size && ! TARGET_PUSH_MEMORY"
- [(set (match_scratch:SI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
(define_peephole2
- [(set (match_operand:HI 0 "push_operand" "")
+ [(match_scratch:HI 2 "r")
+ (set (match_operand:HI 0 "push_operand" "")
(match_operand:HI 1 "memory_operand" ""))]
"! optimize_size && ! TARGET_PUSH_MEMORY"
- [(set (match_scratch:HI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
(define_peephole2
- [(set (match_operand:QI 0 "push_operand" "")
+ [(match_scratch:QI 2 "q")
+ (set (match_operand:QI 0 "push_operand" "")
(match_operand:QI 1 "memory_operand" ""))]
"! optimize_size && ! TARGET_PUSH_MEMORY"
- [(set (match_scratch:QI 2 "q") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
;; Don't move an immediate directly to memory when the instruction
;; gets too big.
(define_peephole2
- [(set (match_operand:SI 0 "memory_operand" "")
+ [(match_scratch:SI 1 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
(const_int 0))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
&& TARGET_SPLIT_LONG_MOVES"
- [(parallel [(set (match_scratch:SI 1 "r") (const_int 0))
+ [(parallel [(set (match_dup 1) (const_int 0))
(clobber (reg:CC 17))])
(set (match_dup 0) (match_dup 1))]
"")
(define_peephole2
- [(set (match_operand:HI 0 "memory_operand" "")
+ [(match_scratch:HI 1 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
(const_int 0))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
&& TARGET_SPLIT_LONG_MOVES"
- [(parallel [(set (match_scratch:HI 1 "r") (const_int 0))
+ [(parallel [(set (match_dup 1) (const_int 0))
(clobber (reg:CC 17))])
(set (match_dup 0) (match_dup 1))]
"")
(define_peephole2
- [(set (match_operand:QI 0 "memory_operand" "")
+ [(match_scratch:QI 1 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
(const_int 0))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& reg_dead_p (insn, gen_rtx_REG (CCmode, FLAGS_REG))
&& TARGET_SPLIT_LONG_MOVES"
- [(parallel [(set (match_scratch:QI 1 "q") (const_int 0))
+ [(parallel [(set (match_dup 1) (const_int 0))
(clobber (reg:CC 17))])
(set (match_dup 0) (match_dup 1))]
"")
(define_peephole2
- [(set (match_operand:SI 0 "memory_operand" "")
+ [(match_scratch:SI 2 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
(match_operand:SI 1 "immediate_operand" ""))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& TARGET_SPLIT_LONG_MOVES"
- [(set (match_scratch:SI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
(define_peephole2
- [(set (match_operand:HI 0 "memory_operand" "")
+ [(match_scratch:HI 2 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
(match_operand:HI 1 "immediate_operand" ""))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& TARGET_SPLIT_LONG_MOVES"
- [(set (match_scratch:HI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
(define_peephole2
- [(set (match_operand:QI 0 "memory_operand" "")
+ [(match_scratch:SI 2 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
(match_operand:QI 1 "immediate_operand" ""))]
"! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
&& TARGET_SPLIT_LONG_MOVES"
- [(set (match_scratch:QI 2 "q") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
;; Don't compare memory with zero, load and use a test instead.
(define_peephole2
- [(set (reg:CCNO 17)
+ [(match_scratch:SI 3 "r")
+ (set (reg:CCNO 17)
(compare:CCNO (match_operand:SI 0 "memory_operand" "")
(const_int 0)))]
"! optimize_size"
- [(set (match_scratch:SI 3 "r") (match_dup 0))
+ [(set (match_dup 3) (match_dup 0))
(set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))]
"")
@@ -7999,26 +8009,28 @@
;; Don't do logical operations with memory inputs.
(define_peephole2
- [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
(match_operator:SI 3 "arith_or_logical_operator"
[(match_dup 0)
(match_operand:SI 1 "memory_operand" "")]))
(clobber (reg:CC 17))])]
"! optimize_size && ! TARGET_READ_MODIFY"
- [(set (match_scratch:SI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 0) (match_dup 2)]))
(clobber (reg:CC 17))])]
"")
(define_peephole2
- [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
(match_operator:SI 3 "arith_or_logical_operator"
[(match_operand:SI 1 "memory_operand" "")
(match_dup 0)]))
(clobber (reg:CC 17))])]
"! optimize_size && ! TARGET_READ_MODIFY"
- [(set (match_scratch:SI 2 "r") (match_dup 1))
+ [(set (match_dup 2) (match_dup 1))
(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 2) (match_dup 0)]))
(clobber (reg:CC 17))])]
@@ -8031,13 +8043,14 @@
; the same decoder scheduling characteristics as the original.
(define_peephole2
- [(parallel [(set (match_operand:SI 0 "memory_operand" "")
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
(match_operator:SI 3 "arith_or_logical_operator"
[(match_dup 0)
(match_operand:SI 1 "nonmemory_operand" "")]))
(clobber (reg:CC 17))])]
"! optimize_size && ! TARGET_READ_MODIFY_WRITE"
- [(set (match_scratch:SI 2 "r") (match_dup 0))
+ [(set (match_dup 2) (match_dup 0))
(parallel [(set (match_dup 2)
(match_op_dup 3 [(match_dup 2) (match_dup 1)]))
(clobber (reg:CC 17))])
@@ -8045,13 +8058,14 @@
"")
(define_peephole2
- [(parallel [(set (match_operand:SI 0 "memory_operand" "")
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
(match_operator:SI 3 "arith_or_logical_operator"
[(match_operand:SI 1 "nonmemory_operand" "")
(match_dup 0)]))
(clobber (reg:CC 17))])]
"! optimize_size && ! TARGET_READ_MODIFY_WRITE"
- [(set (match_scratch:SI 2 "r") (match_dup 0))
+ [(set (match_dup 2) (match_dup 0))
(parallel [(set (match_dup 2)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
(clobber (reg:CC 17))])