PR target/40457
* config/arm/arm.h (MAX_LDM_STM_OPS): New macro.
(arm_regs_in_sequence): Declare.
* config/arm/arm-protos.h (emit_ldm_seq, emit_stm_seq,
load_multiple_sequence, store_multiple_sequence): Delete
declarations.
(arm_gen_load_multiple, arm_gen_store_multiple): Adjust
prototypes.
* config/arm/ldmstm.md: New file.
* config/arm/arm.c (arm_regs_in_sequence): New array.
(compute_offset_order): New static function.
(multiple_operation_profitable_p): New static function.
(load_multiple_sequence): Now static. New args SAVED_ORDER,
CHECK_REGS. All callers changed.
Replace constant 4 with MAX_LDM_STM_OPS throughout.
Use compute_offset_order. If SAVED_ORDER is nonnull, copy the computed
order into it. If CHECK_REGS is false, don't sort REGS.
Handle Thumb mode. Use multiple_oepration_profitable_p.
(store_multiple_sequence): Now static. New args NOPS_TOTAL,
SAVED_ORDER, REG_RTXS and CHECK_REGS. All callers changed.
Replace constant 4 with MAX_LDM_STM_OPS throughout.
Use compute_offset_order. If SAVED_ORDER is nonnull, copy the computed
order into it. If CHECK_REGS is false, don't sort REGS. Set up
REG_RTXS just like REGS.
Handle Thumb mode. Use multiple_oepration_profitable_p.
(arm_gen_load_multiple_1): New function, broken out of
arm_gen_load_multiple.
(arm_gen_store_multiple_1): New function, broken out of
arm_gen_store_multiple.
(arm_gen_multiple_op): New function, with code from
arm_gen_load_multiple and arm_gen_store_multiple moved here.
(arm_gen_load_multiple, arm_gen_store_multiple): Now just
wrappers around arm_gen_multiple_op. Remove argument UP, all callers
changed.
(gen_ldm_seq, gen_stm_seq, gen_const_stm_seq): New functions.
* config/arm/predicates.md (commutative_binary_operator): New.
(load_multiple_operation, store_multiple_operation): Handle more
variants of these patterns with different starting offsets. Handle
Thumb-1.
* config/arm/arm.md: Include "ldmstm.md".
(ldmsi_postinc4, ldmsi_postinc4_thumb1, ldmsi_postinc3, ldmsi_postinc2,
ldmsi4, ldmsi3, ldmsi2, stmsi_postinc4, stmsi_postinc4_thumb1,
stmsi_postinc3, stmsi_postinc2, stmsi4, stmsi3, stmsi2 and related
peepholes): Delete.
* config/arm/ldmstm.md: New file.
* config/arm/arm-ldmstm.ml: New file.
* recog.c (peep2_do_rebuild_jump_labels, peep2_do_cleanup_cfg): New
static variables.
(peep2_buf_position): New static function.
(peep2_regno_dead_p, peep2_reg_dead_p, peep2_find_free_register,
peephole2_optimize): Use it.
(peep2_attempt, peep2_update_life): New static functions, broken out
of peephole2_optimize.
(peep2_fill_buffer): New static function.
(peephole2_optimize): Change the main loop to try to fill the buffer
with the maximum number of insns before matching them against
peepholes. Use a forward scan. Remove special case for targets with
conditional execution.
* config/i386/i386.md (peephole2 for arithmetic ops with memory):
Rewrite so as not to expect the second insn to have had a peephole
applied yet.
Index: recog.c
===================================================================
--- recog.c (revision 158639)
+++ recog.c (working copy)
@@ -2911,6 +2911,10 @@ struct peep2_insn_data
static struct peep2_insn_data peep2_insn_data[MAX_INSNS_PER_PEEP2 + 1];
static int peep2_current;
+
+static bool peep2_do_rebuild_jump_labels;
+static bool peep2_do_cleanup_cfg;
+
/* The number of instructions available to match a peep2. */
int peep2_current_count;
@@ -2919,6 +2923,16 @@ int peep2_current_count;
DF_LIVE_OUT for the block. */
#define PEEP2_EOB pc_rtx
+/* Wrap N to fit into the peep2_insn_data buffer. */
+
+static int
+peep2_buf_position (int n)
+{
+ if (n >= MAX_INSNS_PER_PEEP2 + 1)
+ n -= MAX_INSNS_PER_PEEP2 + 1;
+ return n;
+}
+
/* Return the Nth non-note insn after `current', or return NULL_RTX if it
does not exist. Used by the recognizer to find the next insn to match
in a multi-insn pattern. */
@@ -2928,9 +2942,7 @@ peep2_next_insn (int n)
{
gcc_assert (n <= peep2_current_count);
- n += peep2_current;
- if (n >= MAX_INSNS_PER_PEEP2 + 1)
- n -= MAX_INSNS_PER_PEEP2 + 1;
+ n = peep2_buf_position (peep2_current + n);
return peep2_insn_data[n].insn;
}
@@ -2943,9 +2955,7 @@ peep2_regno_dead_p (int ofs, int regno)
{
gcc_assert (ofs < MAX_INSNS_PER_PEEP2 + 1);
- ofs += peep2_current;
- if (ofs >= MAX_INSNS_PER_PEEP2 + 1)
- ofs -= MAX_INSNS_PER_PEEP2 + 1;
+ ofs = peep2_buf_position (peep2_current + ofs);
gcc_assert (peep2_insn_data[ofs].insn != NULL_RTX);
@@ -2961,9 +2971,7 @@ peep2_reg_dead_p (int ofs, rtx reg)
gcc_assert (ofs < MAX_INSNS_PER_PEEP2 + 1);
- ofs += peep2_current;
- if (ofs >= MAX_INSNS_PER_PEEP2 + 1)
- ofs -= MAX_INSNS_PER_PEEP2 + 1;
+ ofs = peep2_buf_position (peep2_current + ofs);
gcc_assert (peep2_insn_data[ofs].insn != NULL_RTX);
@@ -2998,12 +3006,8 @@ peep2_find_free_register (int from, int
gcc_assert (from < MAX_INSNS_PER_PEEP2 + 1);
gcc_assert (to < MAX_INSNS_PER_PEEP2 + 1);
- from += peep2_current;
- if (from >= MAX_INSNS_PER_PEEP2 + 1)
- from -= MAX_INSNS_PER_PEEP2 + 1;
- to += peep2_current;
- if (to >= MAX_INSNS_PER_PEEP2 + 1)
- to -= MAX_INSNS_PER_PEEP2 + 1;
+ from = peep2_buf_position (peep2_current + from);
+ to = peep2_buf_position (peep2_current + to);
gcc_assert (peep2_insn_data[from].insn != NULL_RTX);
REG_SET_TO_HARD_REG_SET (live, peep2_insn_data[from].live_before);
@@ -3012,8 +3016,7 @@ peep2_find_free_register (int from, int
{
HARD_REG_SET this_live;
- if (++from >= MAX_INSNS_PER_PEEP2 + 1)
- from = 0;
+ from = peep2_buf_position (from + 1);
gcc_assert (peep2_insn_data[from].insn != NULL_RTX);
REG_SET_TO_HARD_REG_SET (this_live, peep2_insn_data[from].live_before);
IOR_HARD_REG_SET (live, this_live);
@@ -3106,236 +3109,297 @@ peep2_reinit_state (regset live)
COPY_REG_SET (peep2_insn_data[MAX_INSNS_PER_PEEP2].live_before, live);
}
-/* Perform the peephole2 optimization pass. */
+/* While scanning basic block BB, we found a match of length MATCH_LEN,
+ starting at INSN. Perform the replacement, removing the old insns and
+ replacing them with ATTEMPT. Returns the last insn emitted. */
-static void
-peephole2_optimize (void)
+static rtx
+peep2_attempt (basic_block bb, rtx insn, int match_len, rtx attempt)
{
- rtx insn, prev;
- bitmap live;
int i;
- basic_block bb;
- bool do_cleanup_cfg = false;
- bool do_rebuild_jump_labels = false;
+ rtx last, note, before_try, x;
+ bool was_call = false;
- df_set_flags (DF_LR_RUN_DCE);
- df_analyze ();
+ /* If we are splitting a CALL_INSN, look for the CALL_INSN
+ in SEQ and copy our CALL_INSN_FUNCTION_USAGE and other
+ cfg-related call notes. */
+ for (i = 0; i <= match_len; ++i)
+ {
+ int j;
+ rtx old_insn, new_insn, note;
- /* Initialize the regsets we're going to use. */
- for (i = 0; i < MAX_INSNS_PER_PEEP2 + 1; ++i)
- peep2_insn_data[i].live_before = BITMAP_ALLOC (®_obstack);
- live = BITMAP_ALLOC (®_obstack);
+ j = peep2_buf_position (peep2_current + i);
+ old_insn = peep2_insn_data[j].insn;
+ if (!CALL_P (old_insn))
+ continue;
+ was_call = true;
- FOR_EACH_BB_REVERSE (bb)
- {
- rtl_profile_for_bb (bb);
+ new_insn = attempt;
+ while (new_insn != NULL_RTX)
+ {
+ if (CALL_P (new_insn))
+ break;
+ new_insn = NEXT_INSN (new_insn);
+ }
- /* Start up propagation. */
- bitmap_copy (live, DF_LR_OUT (bb));
- df_simulate_initialize_backwards (bb, live);
- peep2_reinit_state (live);
+ gcc_assert (new_insn != NULL_RTX);
- for (insn = BB_END (bb); ; insn = prev)
+ CALL_INSN_FUNCTION_USAGE (new_insn)
+ = CALL_INSN_FUNCTION_USAGE (old_insn);
+
+ for (note = REG_NOTES (old_insn);
+ note;
+ note = XEXP (note, 1))
+ switch (REG_NOTE_KIND (note))
+ {
+ case REG_NORETURN:
+ case REG_SETJMP:
+ add_reg_note (new_insn, REG_NOTE_KIND (note),
+ XEXP (note, 0));
+ break;
+ default:
+ /* Discard all other reg notes. */
+ break;
+ }
+
+ /* Croak if there is another call in the sequence. */
+ while (++i <= match_len)
{
- prev = PREV_INSN (insn);
- if (NONDEBUG_INSN_P (insn))
+ j = peep2_buf_position (peep2_current + i);
+ old_insn = peep2_insn_data[j].insn;
+ gcc_assert (!CALL_P (old_insn));
+ }
+ break;
+ }
+
+ i = peep2_buf_position (peep2_current + match_len);
+
+ note = find_reg_note (peep2_insn_data[i].insn, REG_EH_REGION, NULL_RTX);
+
+ /* Replace the old sequence with the new. */
+ last = emit_insn_after_setloc (attempt,
+ peep2_insn_data[i].insn,
+ INSN_LOCATOR (peep2_insn_data[i].insn));
+ before_try = PREV_INSN (insn);
+ delete_insn_chain (insn, peep2_insn_data[i].insn, false);
+
+ /* Re-insert the EH_REGION notes. */
+ if (note || (was_call && nonlocal_goto_handler_labels))
+ {
+ edge eh_edge;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (eh_edge, ei, bb->succs)
+ if (eh_edge->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
+ break;
+
+ if (note)
+ copy_reg_eh_region_note_backward (note, last, before_try);
+
+ if (eh_edge)
+ for (x = last; x != before_try; x = PREV_INSN (x))
+ if (x != BB_END (bb)
+ && (can_throw_internal (x)
+ || can_nonlocal_goto (x)))
{
- rtx attempt, before_try, x;
- int match_len;
- rtx note;
- bool was_call = false;
+ edge nfte, nehe;
+ int flags;
- /* Record this insn. */
- if (--peep2_current < 0)
- peep2_current = MAX_INSNS_PER_PEEP2;
- if (peep2_current_count < MAX_INSNS_PER_PEEP2
- && peep2_insn_data[peep2_current].insn == NULL_RTX)
- peep2_current_count++;
- peep2_insn_data[peep2_current].insn = insn;
- df_simulate_one_insn_backwards (bb, insn, live);
- COPY_REG_SET (peep2_insn_data[peep2_current].live_before, live);
+ nfte = split_block (bb, x);
+ flags = (eh_edge->flags
+ & (EDGE_EH | EDGE_ABNORMAL));
+ if (CALL_P (x))
+ flags |= EDGE_ABNORMAL_CALL;
+ nehe = make_edge (nfte->src, eh_edge->dest,
+ flags);
- if (RTX_FRAME_RELATED_P (insn))
- {
- /* If an insn has RTX_FRAME_RELATED_P set, peephole
- substitution would lose the
- REG_FRAME_RELATED_EXPR that is attached. */
- peep2_reinit_state (live);
- attempt = NULL;
- }
- else
- /* Match the peephole. */
- attempt = peephole2_insns (PATTERN (insn), insn, &match_len);
+ nehe->probability = eh_edge->probability;
+ nfte->probability
+ = REG_BR_PROB_BASE - nehe->probability;
- if (attempt != NULL)
- {
- /* If we are splitting a CALL_INSN, look for the CALL_INSN
- in SEQ and copy our CALL_INSN_FUNCTION_USAGE and other
- cfg-related call notes. */
- for (i = 0; i <= match_len; ++i)
- {
- int j;
- rtx old_insn, new_insn, note;
+ peep2_do_cleanup_cfg |= purge_dead_edges (nfte->dest);
+ bb = nfte->src;
+ eh_edge = nehe;
+ }
- j = i + peep2_current;
- if (j >= MAX_INSNS_PER_PEEP2 + 1)
- j -= MAX_INSNS_PER_PEEP2 + 1;
- old_insn = peep2_insn_data[j].insn;
- if (!CALL_P (old_insn))
- continue;
- was_call = true;
+ /* Converting possibly trapping insn to non-trapping is
+ possible. Zap dummy outgoing edges. */
+ peep2_do_cleanup_cfg |= purge_dead_edges (bb);
+ }
- new_insn = attempt;
- while (new_insn != NULL_RTX)
- {
- if (CALL_P (new_insn))
- break;
- new_insn = NEXT_INSN (new_insn);
- }
+ /* If we generated a jump instruction, it won't have
+ JUMP_LABEL set. Recompute after we're done. */
+ for (x = last; x != before_try; x = PREV_INSN (x))
+ if (JUMP_P (x))
+ {
+ peep2_do_rebuild_jump_labels = true;
+ break;
+ }
- gcc_assert (new_insn != NULL_RTX);
+ return last;
+}
- CALL_INSN_FUNCTION_USAGE (new_insn)
- = CALL_INSN_FUNCTION_USAGE (old_insn);
+/* After performing a replacement in basic block BB, fix up the life
+ information in our buffer. LAST is the last of the insns that we
+ emitted as a replacement. PREV is the insn before the start of
+ the replacement. MATCH_LEN is the number of instructions that were
+ matched, and which now need to be replaced in the buffer. */
- for (note = REG_NOTES (old_insn);
- note;
- note = XEXP (note, 1))
- switch (REG_NOTE_KIND (note))
- {
- case REG_NORETURN:
- case REG_SETJMP:
- add_reg_note (new_insn, REG_NOTE_KIND (note),
- XEXP (note, 0));
- break;
- default:
- /* Discard all other reg notes. */
- break;
- }
+static void
+peep2_update_life (basic_block bb, int match_len, rtx last, rtx prev)
+{
+ int i = peep2_buf_position (peep2_current + match_len + 1);
+ rtx x;
+ regset_head live;
- /* Croak if there is another call in the sequence. */
- while (++i <= match_len)
- {
- j = i + peep2_current;
- if (j >= MAX_INSNS_PER_PEEP2 + 1)
- j -= MAX_INSNS_PER_PEEP2 + 1;
- old_insn = peep2_insn_data[j].insn;
- gcc_assert (!CALL_P (old_insn));
- }
- break;
- }
+ INIT_REG_SET (&live);
+ COPY_REG_SET (&live, peep2_insn_data[i].live_before);
- i = match_len + peep2_current;
- if (i >= MAX_INSNS_PER_PEEP2 + 1)
- i -= MAX_INSNS_PER_PEEP2 + 1;
+ gcc_assert (peep2_current_count >= match_len + 1);
+ peep2_current_count -= match_len + 1;
- note = find_reg_note (peep2_insn_data[i].insn,
- REG_EH_REGION, NULL_RTX);
+ x = last;
+ do
+ {
+ if (INSN_P (x))
+ {
+ df_insn_rescan (x);
+ if (peep2_current_count < MAX_INSNS_PER_PEEP2)
+ {
+ peep2_current_count++;
+ if (--i < 0)
+ i = MAX_INSNS_PER_PEEP2;
+ peep2_insn_data[i].insn = x;
+ df_simulate_one_insn_backwards (bb, x, &live);
+ COPY_REG_SET (peep2_insn_data[i].live_before, &live);
+ }
+ }
+ x = PREV_INSN (x);
+ }
+ while (x != prev);
+ CLEAR_REG_SET (&live);
- /* Replace the old sequence with the new. */
- attempt = emit_insn_after_setloc (attempt,
- peep2_insn_data[i].insn,
- INSN_LOCATOR (peep2_insn_data[i].insn));
- before_try = PREV_INSN (insn);
- delete_insn_chain (insn, peep2_insn_data[i].insn, false);
+ peep2_current = i;
+}
- /* Re-insert the EH_REGION notes. */
- if (note || (was_call && nonlocal_goto_handler_labels))
- {
- edge eh_edge;
- edge_iterator ei;
+/* Add INSN, which is in BB, at the end of the peep2 insn buffer if possible.
+ Return true if we added it, false otherwise. */
- FOR_EACH_EDGE (eh_edge, ei, bb->succs)
- if (eh_edge->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
- break;
+static bool
+peep2_fill_buffer (basic_block bb, rtx insn, regset live)
+{
+ int pos;
- if (note)
- copy_reg_eh_region_note_backward (note, attempt,
- before_try);
+ if (peep2_current_count == MAX_INSNS_PER_PEEP2)
+ return false;
- if (eh_edge)
- for (x = attempt ; x != before_try ; x = PREV_INSN (x))
- if (x != BB_END (bb)
- && (can_throw_internal (x)
- || can_nonlocal_goto (x)))
- {
- edge nfte, nehe;
- int flags;
+ /* If an insn has RTX_FRAME_RELATED_P set, peephole substitution would lose
+ the REG_FRAME_RELATED_EXPR that is attached. */
+ if (RTX_FRAME_RELATED_P (insn))
+ {
+ /* Let the buffer drain first. */
+ if (peep2_current_count > 0)
+ return false;
+ df_simulate_one_insn_forwards (bb, insn, live);
+ return true;
+ }
- nfte = split_block (bb, x);
- flags = (eh_edge->flags
- & (EDGE_EH | EDGE_ABNORMAL));
- if (CALL_P (x))
- flags |= EDGE_ABNORMAL_CALL;
- nehe = make_edge (nfte->src, eh_edge->dest,
- flags);
+ pos = peep2_buf_position (peep2_current + peep2_current_count);
+ peep2_insn_data[pos].insn = insn;
+ COPY_REG_SET (peep2_insn_data[pos].live_before, live);
+ peep2_current_count++;
- nehe->probability = eh_edge->probability;
- nfte->probability
- = REG_BR_PROB_BASE - nehe->probability;
+ df_simulate_one_insn_forwards (bb, insn, live);
+ return true;
+}
- do_cleanup_cfg |= purge_dead_edges (nfte->dest);
- bb = nfte->src;
- eh_edge = nehe;
- }
+/* Perform the peephole2 optimization pass. */
- /* Converting possibly trapping insn to non-trapping is
- possible. Zap dummy outgoing edges. */
- do_cleanup_cfg |= purge_dead_edges (bb);
- }
+static void
+peephole2_optimize (void)
+{
+ rtx insn;
+ bitmap live, saved_live;
+ rtx saved_live_insn;
+ int i;
+ basic_block bb;
- if (targetm.have_conditional_execution ())
- {
- for (i = 0; i < MAX_INSNS_PER_PEEP2 + 1; ++i)
- peep2_insn_data[i].insn = NULL_RTX;
- peep2_insn_data[peep2_current].insn = PEEP2_EOB;
- peep2_current_count = 0;
- }
- else
- {
- /* Back up lifetime information past the end of the
- newly created sequence. */
- if (++i >= MAX_INSNS_PER_PEEP2 + 1)
- i = 0;
- bitmap_copy (live, peep2_insn_data[i].live_before);
+ peep2_do_cleanup_cfg = false;
+ peep2_do_rebuild_jump_labels = false;
- /* Update life information for the new sequence. */
- x = attempt;
- do
- {
- if (INSN_P (x))
- {
- if (--i < 0)
- i = MAX_INSNS_PER_PEEP2;
- if (peep2_current_count < MAX_INSNS_PER_PEEP2
- && peep2_insn_data[i].insn == NULL_RTX)
- peep2_current_count++;
- peep2_insn_data[i].insn = x;
- df_insn_rescan (x);
- df_simulate_one_insn_backwards (bb, x, live);
- bitmap_copy (peep2_insn_data[i].live_before,
- live);
- }
- x = PREV_INSN (x);
- }
- while (x != prev);
+ df_set_flags (DF_LR_RUN_DCE);
+ df_note_add_problem ();
+ df_analyze ();
- peep2_current = i;
- }
+ /* Initialize the regsets we're going to use. */
+ for (i = 0; i < MAX_INSNS_PER_PEEP2 + 1; ++i)
+ peep2_insn_data[i].live_before = BITMAP_ALLOC (®_obstack);
+ live = BITMAP_ALLOC (®_obstack);
+ saved_live = BITMAP_ALLOC (®_obstack);
- /* If we generated a jump instruction, it won't have
- JUMP_LABEL set. Recompute after we're done. */
- for (x = attempt; x != before_try; x = PREV_INSN (x))
- if (JUMP_P (x))
- {
- do_rebuild_jump_labels = true;
- break;
- }
+ FOR_EACH_BB_REVERSE (bb)
+ {
+ bool past_end = false;
+ int pos;
+
+ rtl_profile_for_bb (bb);
+
+ /* Start up propagation. */
+ bitmap_copy (live, DF_LR_IN (bb));
+ df_simulate_initialize_forwards (bb, live);
+ peep2_reinit_state (live);
+
+ saved_live_insn = NULL_RTX;
+
+ insn = BB_HEAD (bb);
+ for (;;)
+ {
+ rtx attempt, head;
+ int match_len;
+
+ if (!past_end && !NONDEBUG_INSN_P (insn))
+ {
+ next_insn:
+ insn = NEXT_INSN (insn);
+ if (insn == saved_live_insn)
+ {
+ COPY_REG_SET (live, saved_live);
+ saved_live_insn = NULL_RTX;
}
+ if (insn == NEXT_INSN (BB_END (bb)))
+ past_end = true;
+ continue;
}
+ if (!past_end && peep2_fill_buffer (bb, insn, live))
+ goto next_insn;
- if (insn == BB_HEAD (bb))
+ /* If we did not fill an empty buffer, it signals the end of the
+ block. */
+ if (peep2_current_count == 0)
break;
+
+ /* The buffer filled to the current maximum, so try to match. */
+
+ pos = peep2_buf_position (peep2_current + peep2_current_count);
+ peep2_insn_data[pos].insn = PEEP2_EOB;
+ COPY_REG_SET (peep2_insn_data[pos].live_before, live);
+
+ /* Match the peephole. */
+ head = peep2_insn_data[peep2_current].insn;
+ attempt = peephole2_insns (PATTERN (head), head, &match_len);
+ if (attempt != NULL)
+ {
+ rtx before_head = PREV_INSN (head);
+ rtx last;
+ last = peep2_attempt (bb, head, match_len, attempt);
+ peep2_update_life (bb, match_len, last, PREV_INSN (attempt));
+ }
+ else
+ {
+ /* If no match, advance the buffer by one insn. */
+ peep2_current = peep2_buf_position (peep2_current + 1);
+ peep2_current_count--;
+ }
}
}
@@ -3343,7 +3407,7 @@ peephole2_optimize (void)
for (i = 0; i < MAX_INSNS_PER_PEEP2 + 1; ++i)
BITMAP_FREE (peep2_insn_data[i].live_before);
BITMAP_FREE (live);
- if (do_rebuild_jump_labels)
+ if (peep2_do_rebuild_jump_labels)
rebuild_jump_labels (get_insns ());
}
#endif /* HAVE_peephole2 */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 158639)
+++ config/i386/i386.md (working copy)
@@ -18083,15 +18083,14 @@ (define_peephole2
;; leal (%edx,%eax,4), %eax
(define_peephole2
- [(parallel [(set (match_operand 0 "register_operand" "")
+ [(match_scratch:SI 5 "r")
+ (parallel [(set (match_operand 0 "register_operand" "")
(ashift (match_operand 1 "register_operand" "")
(match_operand 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
- (set (match_operand 3 "register_operand")
- (match_operand 4 "x86_64_general_operand" ""))
- (parallel [(set (match_operand 5 "register_operand" "")
- (plus (match_operand 6 "register_operand" "")
- (match_operand 7 "register_operand" "")))
+ (parallel [(set (match_operand 3 "register_operand" "")
+ (plus (match_dup 0)
+ (match_operand 4 "x86_64_general_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
"INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
/* Validate MODE for lea. */
@@ -18101,30 +18100,21 @@ (define_peephole2
|| GET_MODE (operands[0]) == SImode
|| (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
/* We reorder load and the shift. */
- && !rtx_equal_p (operands[1], operands[3])
- && !reg_overlap_mentioned_p (operands[0], operands[4])
- /* Last PLUS must consist of operand 0 and 3. */
- && !rtx_equal_p (operands[0], operands[3])
- && (rtx_equal_p (operands[3], operands[6])
- || rtx_equal_p (operands[3], operands[7]))
- && (rtx_equal_p (operands[0], operands[6])
- || rtx_equal_p (operands[0], operands[7]))
- /* The intermediate operand 0 must die or be same as output. */
- && (rtx_equal_p (operands[0], operands[5])
- || peep2_reg_dead_p (3, operands[0]))"
- [(set (match_dup 3) (match_dup 4))
+ && !reg_overlap_mentioned_p (operands[0], operands[4])"
+ [(set (match_dup 5) (match_dup 4))
(set (match_dup 0) (match_dup 1))]
{
- enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+ enum machine_mode mode = GET_MODE (operands[1]) == DImode ? DImode : SImode;
int scale = 1 << INTVAL (operands[2]);
rtx index = gen_lowpart (Pmode, operands[1]);
- rtx base = gen_lowpart (Pmode, operands[3]);
- rtx dest = gen_lowpart (mode, operands[5]);
+ rtx base = gen_lowpart (Pmode, operands[5]);
+ rtx dest = gen_lowpart (mode, operands[3]);
operands[1] = gen_rtx_PLUS (Pmode, base,
gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
if (mode != Pmode)
operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+ operands[5] = base;
operands[0] = dest;
})
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c (revision 158771)
+++ config/arm/arm.c (working copy)
@@ -724,6 +724,12 @@ static const char * const arm_condition_
"hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
};
+/* The register numbers in sequence, for passing to arm_gen_load_multiple. */
+int arm_regs_in_sequence[] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
#define streq(string1, string2) (strcmp (string1, string2) == 0)
@@ -9074,21 +9080,121 @@ adjacent_mem_locations (rtx a, rtx b)
return 0;
}
-int
-load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
- HOST_WIDE_INT *load_offset)
+/* Return true iff it would be profitable to turn a sequence of NOPS loads
+ or stores (depending on IS_STORE) into a load-multiple or store-multiple
+ instruction. NEED_ADD is true if the base address register needs to be
+ modified with an add instruction before we can use it. */
+
+static bool
+multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
+ int nops, bool need_add)
{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
+ if the offset isn't small enough. The reason 2 ldrs are faster
+ is because these ARMs are able to do more than one cache access
+ in a single cycle. The ARM9 and StrongARM have Harvard caches,
+ whilst the ARM8 has a double bandwidth cache. This means that
+ these cores can do both an instruction fetch and a data fetch in
+ a single cycle, so the trick of calculating the address into a
+ scratch register (one of the result regs) and then doing a load
+ multiple actually becomes slower (and no smaller in code size).
+ That is the transformation
+
+ ldr rd1, [rbase + offset]
+ ldr rd2, [rbase + offset + 4]
+
+ to
+
+ add rd1, rbase, offset
+ ldmia rd1, {rd1, rd2}
+
+ produces worse code -- '3 cycles + any stalls on rd2' instead of
+ '2 cycles + any stalls on rd2'. On ARMs with only one cache
+ access per cycle, the first sequence could never complete in less
+ than 6 cycles, whereas the ldm sequence would only take 5 and
+ would make better use of sequential accesses if not hitting the
+ cache.
+
+ We cheat here and test 'arm_ld_sched' which we currently know to
+ only be true for the ARM8, ARM9 and StrongARM. If this ever
+ changes, then the test below needs to be reworked. */
+ if (nops == 2 && arm_ld_sched && need_add)
+ return false;
+
+ return true;
+}
+
+/* Subroutine of load_multiple_sequence and store_multiple_sequence.
+ Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
+ an array ORDER which describes the sequence to use when accessing the
+ offsets that produces an ascending order. In this sequence, each
+ offset must be larger by exactly 4 than the previous one. ORDER[0]
+ must have been filled in with the lowest offset by the caller.
+ If UNSORTED_REGS is nonnull, it is an array of register numbers that
+ we use to verify that ORDER produces an ascending order of registers.
+ Return true if it was possible to construct such an order, false if
+ not. */
+
+static bool
+compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
+ int *unsorted_regs)
+{
+ int i;
+ for (i = 1; i < nops; i++)
+ {
+ int j;
+
+ order[i] = order[i - 1];
+ for (j = 0; j < nops; j++)
+ if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
+ {
+ /* We must find exactly one offset that is higher than the
+ previous one by 4. */
+ if (order[i] != order[i - 1])
+ return false;
+ order[i] = j;
+ }
+ if (order[i] == order[i - 1])
+ return false;
+ /* The register numbers must be ascending. */
+ if (unsorted_regs != NULL
+ && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
+ return false;
+ }
+ return true;
+}
+
+/* Used to determine in a peephole whether a sequence of load instructions can
+ be changed into a load-multiple instruction.
+ NOPS is the number of separate load instructions we are examining.
+ The first NOPS entries in OPERANDS are the destination registers, the next
+ NOPS entries are memory operands. If this function is successful, *BASE is
+ set to the common base register of the memory accesses; *LOAD_OFFSET is set
+ to the first memory location's offset from that base register. REGS is an
+ array filled in with the destination register numbers.
+ SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
+ numbers to to an ascending order of stores.
+ If CHECK_REGS is true, the sequence of registers in REGS matches the loads
+ from ascending memory locations, and the function verifies that the register
+ numbers are themselves ascending. If CHECK_REGS is false, the register
+ numbers are stored in the order they are found in the operands. */
+static int
+load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
+ int *base, HOST_WIDE_INT *load_offset, bool check_regs)
+{
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
+ rtx base_reg_rtx;
int base_reg = -1;
int i;
+ int ldm_case;
- /* Can only handle 2, 3, or 4 insns at present,
+ /* Can only handle between 2 and MAX_LDMSTM_OPS insns at present,
though could be easily extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
- memset (order, 0, 4 * sizeof (int));
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
@@ -9126,32 +9232,32 @@ load_multiple_sequence (rtx *operands, i
if (i == 0)
{
base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
+ base_reg_rtx = reg;
+ if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+ return 0;
}
else
{
if (base_reg != (int) REGNO (reg))
/* Not addressed from the same base register. */
return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
}
+ unsorted_regs[i] = (GET_CODE (operands[i]) == REG
+ ? REGNO (operands[i])
+ : REGNO (SUBREG_REG (operands[i])));
/* If it isn't an integer register, or if it overwrites the
base register but isn't the last insn in the list, then
we can't do this. */
- if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
+ if (unsorted_regs[i] < 0
+ || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+ || unsorted_regs[i] > 14
|| (i != nops - 1 && unsorted_regs[i] == base_reg))
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
@@ -9160,164 +9266,87 @@ load_multiple_sequence (rtx *operands, i
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order,
+ check_regs ? unsorted_regs : NULL))
+ return 0;
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
-
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ if (saved_order)
+ memcpy (saved_order, order, sizeof order);
if (base)
{
*base = base_reg;
for (i = 0; i < nops; i++)
- regs[i] = unsorted_regs[order[i]];
+ regs[i] = unsorted_regs[check_regs ? order[i] : i];
*load_offset = unsorted_offsets[order[0]];
}
- if (unsorted_offsets[order[0]] == 0)
- return 1; /* ldmia */
-
- if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
- return 2; /* ldmib */
-
- if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* ldmda */
-
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* ldmdb */
-
- /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
- if the offset isn't small enough. The reason 2 ldrs are faster
- is because these ARMs are able to do more than one cache access
- in a single cycle. The ARM9 and StrongARM have Harvard caches,
- whilst the ARM8 has a double bandwidth cache. This means that
- these cores can do both an instruction fetch and a data fetch in
- a single cycle, so the trick of calculating the address into a
- scratch register (one of the result regs) and then doing a load
- multiple actually becomes slower (and no smaller in code size).
- That is the transformation
-
- ldr rd1, [rbase + offset]
- ldr rd2, [rbase + offset + 4]
-
- to
-
- add rd1, rbase, offset
- ldmia rd1, {rd1, rd2}
+ if (TARGET_THUMB1
+ && !peep2_reg_dead_p (nops, base_reg_rtx))
+ return 0;
- produces worse code -- '3 cycles + any stalls on rd2' instead of
- '2 cycles + any stalls on rd2'. On ARMs with only one cache
- access per cycle, the first sequence could never complete in less
- than 6 cycles, whereas the ldm sequence would only take 5 and
- would make better use of sequential accesses if not hitting the
- cache.
+ if (unsorted_offsets[order[0]] == 0)
+ ldm_case = 1; /* ldmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ ldm_case = 2; /* ldmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ ldm_case = 3; /* ldmda */
+ else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+ ldm_case = 4; /* ldmdb */
+ else if (const_ok_for_arm (unsorted_offsets[order[0]])
+ || const_ok_for_arm (-unsorted_offsets[order[0]]))
+ ldm_case = 5;
+ else
+ return 0;
- We cheat here and test 'arm_ld_sched' which we currently know to
- only be true for the ARM8, ARM9 and StrongARM. If this ever
- changes, then the test below needs to be reworked. */
- if (nops == 2 && arm_ld_sched)
+ if (!multiple_operation_profitable_p (false, nops, ldm_case == 5))
return 0;
- /* Can't do it without setting up the offset, only do this if it takes
- no more than one insn. */
- return (const_ok_for_arm (unsorted_offsets[order[0]])
- || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
+ return ldm_case;
}
-const char *
-emit_ldm_seq (rtx *operands, int nops)
-{
- int regs[4];
- int base_reg;
- HOST_WIDE_INT offset;
- char buf[100];
- int i;
-
- switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
- {
- case 1:
- strcpy (buf, "ldm%(ia%)\t");
- break;
-
- case 2:
- strcpy (buf, "ldm%(ib%)\t");
- break;
-
- case 3:
- strcpy (buf, "ldm%(da%)\t");
- break;
-
- case 4:
- strcpy (buf, "ldm%(db%)\t");
- break;
-
- case 5:
- if (offset >= 0)
- sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
- reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
- (long) offset);
- else
- sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
- reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
- (long) -offset);
- output_asm_insn (buf, operands);
- base_reg = regs[0];
- strcpy (buf, "ldm%(ia%)\t");
- break;
-
- default:
- gcc_unreachable ();
- }
-
- sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
- reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
-
- for (i = 1; i < nops; i++)
- sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
- reg_names[regs[i]]);
-
- strcat (buf, "}\t%@ phole ldm");
-
- output_asm_insn (buf, operands);
- return "";
-}
+/* Used to determine in a peephole whether a sequence of store instructions can
+ be changed into a store-multiple instruction.
+ NOPS is the number of separate store instructions we are examining.
+ NOPS_TOTAL is the total number of instructions recognized by the peephole
+ pattern.
+ The first NOPS entries in OPERANDS are the source registers, the next
+ NOPS entries are memory operands. If this function is successful, *BASE is
+ set to the common base register of the memory accesses; *LOAD_OFFSET is set
+ to the first memory location's offset from that base register. REGS is an
+ array filled in with the source register numbers, REG_RTXS (if nonnull) is
+ likewise filled with the corresponding rtx's.
+ SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
+ numbers to to an ascending order of stores.
+ If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
+ from ascending memory locations, and the function verifies that the register
+ numbers are themselves ascending. If CHECK_REGS is false, the register
+ numbers are stored in the order they are found in the operands. */
-int
-store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
- HOST_WIDE_INT * load_offset)
+static int
+store_multiple_sequence (rtx *operands, int nops, int nops_total,
+ int *regs, rtx *reg_rtxs, int *saved_order, int *base,
+ HOST_WIDE_INT *load_offset, bool check_regs)
{
- int unsorted_regs[4];
- HOST_WIDE_INT unsorted_offsets[4];
- int order[4];
+ int unsorted_regs[MAX_LDM_STM_OPS];
+ rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
+ int order[MAX_LDM_STM_OPS];
int base_reg = -1;
- int i;
+ rtx base_reg_rtx;
+ int i, stm_case;
- /* Can only handle 2, 3, or 4 insns at present, though could be easily
- extended if required. */
- gcc_assert (nops >= 2 && nops <= 4);
+ /* Can only handle between 2 and MAX_LDMSTM_OPS insns at present,
+ though could be easily extended if required. */
+ gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
- memset (order, 0, 4 * sizeof (int));
+ memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
/* Loop over the operands and check that the memory references are
suitable (i.e. immediate offsets from the same base register). At
@@ -9355,29 +9384,31 @@ store_multiple_sequence (rtx *operands,
if (i == 0)
{
base_reg = REGNO (reg);
- unsorted_regs[0] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- order[0] = 0;
+ base_reg_rtx = reg;
+ if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
+ return 0;
}
else
{
if (base_reg != (int) REGNO (reg))
/* Not addressed from the same base register. */
return 0;
-
- unsorted_regs[i] = (GET_CODE (operands[i]) == REG
- ? REGNO (operands[i])
- : REGNO (SUBREG_REG (operands[i])));
- if (unsorted_regs[i] < unsorted_regs[order[0]])
- order[0] = i;
}
+ unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
+ ? operands[i] : SUBREG_REG (operands[i]));
+ unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
/* If it isn't an integer register, then we can't do this. */
- if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
+ if (unsorted_regs[i] < 0
+ || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
+ || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
+ || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
+ || unsorted_regs[i] > 14)
return 0;
unsorted_offsets[i] = INTVAL (offset);
+ if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
+ order[0] = i;
}
else
/* Not a suitable memory address. */
@@ -9386,111 +9417,78 @@ store_multiple_sequence (rtx *operands,
/* All the useful information has now been extracted from the
operands into unsorted_regs and unsorted_offsets; additionally,
- order[0] has been set to the lowest numbered register in the
- list. Sort the registers into order, and check that the memory
- offsets are ascending and adjacent. */
-
- for (i = 1; i < nops; i++)
- {
- int j;
-
- order[i] = order[i - 1];
- for (j = 0; j < nops; j++)
- if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
- && (order[i] == order[i - 1]
- || unsorted_regs[j] < unsorted_regs[order[i]]))
- order[i] = j;
+ order[0] has been set to the lowest offset in the list. Sort
+ the offsets into order, verifying that they are adjacent, and
+ check that the register numbers are ascending. */
+ if (!compute_offset_order (nops, unsorted_offsets, order,
+ check_regs ? unsorted_regs : NULL))
+ return 0;
- /* Have we found a suitable register? if not, one must be used more
- than once. */
- if (order[i] == order[i - 1])
- return 0;
- /* Is the memory address adjacent and ascending? */
- if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
- return 0;
- }
+ if (saved_order)
+ memcpy (saved_order, order, sizeof order);
if (base)
{
*base = base_reg;
for (i = 0; i < nops; i++)
- regs[i] = unsorted_regs[order[i]];
+ {
+ regs[i] = unsorted_regs[check_regs ? order[i] : i];
+ if (reg_rtxs)
+ reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
+ }
*load_offset = unsorted_offsets[order[0]];
}
- if (unsorted_offsets[order[0]] == 0)
- return 1; /* stmia */
+ if (TARGET_THUMB1
+ && !peep2_reg_dead_p (nops_total, base_reg_rtx))
+ return 0;
- if (unsorted_offsets[order[0]] == 4)
- return 2; /* stmib */
+ if (unsorted_offsets[order[0]] == 0)
+ stm_case = 1; /* stmia */
+ else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
+ stm_case = 2; /* stmib */
+ else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
+ stm_case = 3; /* stmda */
+ else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
+ stm_case = 4; /* stmdb */
+ else if (peep2_reg_dead_p (nops_total, base_reg_rtx)
+ && (const_ok_for_arm (unsorted_offsets[order[0]])
+ || const_ok_for_arm (-unsorted_offsets[order[0]])))
+ stm_case = 5;
+ else
+ return 0;
- if (unsorted_offsets[order[nops - 1]] == 0)
- return 3; /* stmda */
+ if (!multiple_operation_profitable_p (false, nops, stm_case == 5))
+ return 0;
- if (unsorted_offsets[order[nops - 1]] == -4)
- return 4; /* stmdb */
+ return stm_case;
+ if (!peep2_reg_dead_p (nops_total, base_reg_rtx) || nops == 2)
+ return 0;
+ /* Can't do it without setting up the offset, only do this if it takes
+ no more than one insn. */
+ return (const_ok_for_arm (unsorted_offsets[order[0]])
+ || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
return 0;
}
-
-const char *
-emit_stm_seq (rtx *operands, int nops)
-{
- int regs[4];
- int base_reg;
- HOST_WIDE_INT offset;
- char buf[100];
- int i;
-
- switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
- {
- case 1:
- strcpy (buf, "stm%(ia%)\t");
- break;
-
- case 2:
- strcpy (buf, "stm%(ib%)\t");
- break;
-
- case 3:
- strcpy (buf, "stm%(da%)\t");
- break;
-
- case 4:
- strcpy (buf, "stm%(db%)\t");
- break;
-
- default:
- gcc_unreachable ();
- }
-
- sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
- reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
-
- for (i = 1; i < nops; i++)
- sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
- reg_names[regs[i]]);
-
- strcat (buf, "}\t%@ phole stm");
-
- output_asm_insn (buf, operands);
- return "";
-}
/* Routines for use in generating RTL. */
-rtx
-arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
- int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+/* Generate a load-multiple instruction. COUNT is the number of loads in
+ the instruction; REGS and MEMS are arrays containing the operands.
+ BASEREG is the base register to be used in addressing the memory operands.
+ WBACK_OFFSET is nonzero if the instruction should update the base
+ register. */
+
+static rtx
+arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+ HOST_WIDE_INT wback_offset)
{
- HOST_WIDE_INT offset = *offsetp;
int i = 0, j;
rtx result;
- int sign = up ? 1 : -1;
- rtx mem, addr;
/* XScale has load-store double instructions, but they have stricter
alignment requirements than load-store multiple, so we cannot
@@ -9527,18 +9525,10 @@ arm_gen_load_multiple (int base_regno, i
start_sequence ();
for (i = 0; i < count; i++)
- {
- addr = plus_constant (from, i * 4 * sign);
- mem = adjust_automodify_address (basemem, SImode, addr, offset);
- emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
- offset += 4 * sign;
- }
+ emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
- if (write_back)
- {
- emit_move_insn (from, plus_constant (from, count * 4 * sign));
- *offsetp = offset;
- }
+ if (wback_offset != 0)
+ emit_move_insn (basereg, plus_constant (basereg, wback_offset));
seq = get_insns ();
end_sequence ();
@@ -9547,41 +9537,40 @@ arm_gen_load_multiple (int base_regno, i
}
result = gen_rtx_PARALLEL (VOIDmode,
- rtvec_alloc (count + (write_back ? 1 : 0)));
- if (write_back)
+ rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+ if (wback_offset != 0)
{
XVECEXP (result, 0, 0)
- = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
+ = gen_rtx_SET (VOIDmode, basereg,
+ plus_constant (basereg, wback_offset));
i = 1;
count++;
}
for (j = 0; i < count; i++, j++)
- {
- addr = plus_constant (from, j * 4 * sign);
- mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
- XVECEXP (result, 0, i)
- = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
- offset += 4 * sign;
- }
-
- if (write_back)
- *offsetp = offset;
+ XVECEXP (result, 0, i)
+ = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
return result;
}
-rtx
-arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
- int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+/* Generate a store-multiple instruction. COUNT is the number of stores in
+ the instruction; REGS and MEMS are arrays containing the operands.
+ BASEREG is the base register to be used in addressing the memory operands.
+ WBACK_OFFSET is nonzero if the instruction should update the base
+ register. */
+
+static rtx
+arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
+ HOST_WIDE_INT wback_offset)
{
- HOST_WIDE_INT offset = *offsetp;
int i = 0, j;
rtx result;
- int sign = up ? 1 : -1;
- rtx mem, addr;
- /* See arm_gen_load_multiple for discussion of
+ if (GET_CODE (basereg) == PLUS)
+ basereg = XEXP (basereg, 0);
+
+ /* See arm_gen_load_multiple_1 for discussion of
the pros/cons of ldm/stm usage for XScale. */
if (arm_tune_xscale && count <= 2 && ! optimize_size)
{
@@ -9590,18 +9579,10 @@ arm_gen_store_multiple (int base_regno,
start_sequence ();
for (i = 0; i < count; i++)
- {
- addr = plus_constant (to, i * 4 * sign);
- mem = adjust_automodify_address (basemem, SImode, addr, offset);
- emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
- offset += 4 * sign;
- }
+ emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
- if (write_back)
- {
- emit_move_insn (to, plus_constant (to, count * 4 * sign));
- *offsetp = offset;
- }
+ if (wback_offset != 0)
+ emit_move_insn (basereg, plus_constant (basereg, wback_offset));
seq = get_insns ();
end_sequence ();
@@ -9610,29 +9591,319 @@ arm_gen_store_multiple (int base_regno,
}
result = gen_rtx_PARALLEL (VOIDmode,
- rtvec_alloc (count + (write_back ? 1 : 0)));
- if (write_back)
+ rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
+ if (wback_offset != 0)
{
XVECEXP (result, 0, 0)
- = gen_rtx_SET (VOIDmode, to,
- plus_constant (to, count * 4 * sign));
+ = gen_rtx_SET (VOIDmode, basereg,
+ plus_constant (basereg, wback_offset));
i = 1;
count++;
}
for (j = 0; i < count; i++, j++)
+ XVECEXP (result, 0, i)
+ = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
+
+ return result;
+}
+
+/* Generate either a load-multiple or a store-multiple instruction. This
+ function can be used in situations where we can start with a single MEM
+ rtx and adjust its address upwards.
+ COUNT is the number of operations in the instruction, not counting a
+ possible update of the base register. REGS is an array containing the
+ register operands.
+ BASEREG is the base register to be used in addressing the memory operands,
+ which are constructed from BASEMEM.
+ WRITE_BACK specifies whether the generated instruction should include an
+ update of the base register.
+ OFFSETP is used to pass an offset to and from this function; this offset
+ is not used when constructing the address (instead BASEMEM should have an
+ appropriate offset in its address), it is used only for setting
+ MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
+
+static rtx
+arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
+ bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
+{
+ rtx mems[MAX_LDM_STM_OPS];
+ HOST_WIDE_INT offset = *offsetp;
+ int i;
+
+ gcc_assert (count <= MAX_LDM_STM_OPS);
+
+ if (GET_CODE (basereg) == PLUS)
+ basereg = XEXP (basereg, 0);
+
+ for (i = 0; i < count; i++)
{
- addr = plus_constant (to, j * 4 * sign);
- mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
- XVECEXP (result, 0, i)
- = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
- offset += 4 * sign;
+ rtx addr = plus_constant (basereg, i * 4);
+ mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
+ offset += 4;
}
if (write_back)
*offsetp = offset;
- return result;
+ if (is_load)
+ return arm_gen_load_multiple_1 (count, regs, mems, basereg,
+ write_back ? 4 * count : 0);
+ else
+ return arm_gen_store_multiple_1 (count, regs, mems, basereg,
+ write_back ? 4 * count : 0);
+}
+
+rtx
+arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
+ rtx basemem, HOST_WIDE_INT *offsetp)
+{
+ return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
+ offsetp);
+}
+
+rtx
+arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
+ rtx basemem, HOST_WIDE_INT *offsetp)
+{
+ return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
+ offsetp);
+}
+
+/* Called from a peephole2 expander to turn a sequence of loads into an
+ LDM instruction. OPERANDS are the operands found by the peephole matcher;
+ NOPS indicates how many separate loads we are trying to combine. SORT_REGS
+ is true if we can reorder the registers because they are used commutatively
+ subsequently.
+ Returns true iff we could generate a new instruction. */
+
+bool
+gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
+{
+ int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int i, j, base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int ldm_case;
+ rtx addr;
+
+ ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
+ &base_reg, &offset, !sort_regs);
+
+ if (ldm_case == 0)
+ return false;
+
+ if (sort_regs)
+ for (i = 0; i < nops - 1; i++)
+ for (j = i + 1; j < nops; j++)
+ if (regs[i] > regs[j])
+ {
+ int t = regs[i];
+ regs[i] = regs[j];
+ regs[j] = t;
+ }
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+ if (TARGET_THUMB1)
+ {
+ gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
+ gcc_assert (ldm_case == 1 || ldm_case == 5);
+ write_back = TRUE;
+ }
+
+ if (ldm_case == 5)
+ {
+ rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
+ emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ if (!TARGET_THUMB1)
+ {
+ base_reg = regs[0];
+ base_reg_rtx = newbase;
+ }
+ }
+
+ for (i = 0; i < nops; i++)
+ {
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores into an
+ STM instruction. OPERANDS are the operands found by the peephole matcher;
+ NOPS indicates how many separate stores we are trying to combine.
+ Returns true iff we could generate a new instruction. */
+
+bool
+gen_stm_seq (rtx *operands, int nops)
+{
+ int i;
+ int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int stm_case;
+ rtx addr;
+ bool base_reg_dies;
+
+ stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
+ mem_order, &base_reg, &offset, true);
+
+ if (stm_case == 0)
+ return false;
+
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+ base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
+ if (TARGET_THUMB1)
+ {
+ gcc_assert (base_reg_dies);
+ write_back = TRUE;
+ }
+
+ if (stm_case == 5)
+ {
+ gcc_assert (base_reg_dies);
+ emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ }
+
+ addr = plus_constant (base_reg_rtx, offset);
+
+ for (i = 0; i < nops; i++)
+ {
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
+}
+
+/* Called from a peephole2 expander to turn a sequence of stores that are
+ preceded by constant loads into an STM instruction. OPERANDS are the
+ operands found by the peephole matcher; NOPS indicates how many
+ separate stores we are trying to combine; there are 2 * NOPS
+ instructions in the peephole.
+ Returns true iff we could generate a new instruction. */
+
+bool
+gen_const_stm_seq (rtx *operands, int nops)
+{
+ int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
+ int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
+ rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
+ rtx mems[MAX_LDM_STM_OPS];
+ int base_reg;
+ rtx base_reg_rtx;
+ HOST_WIDE_INT offset;
+ int write_back = FALSE;
+ int stm_case;
+ rtx addr;
+ bool base_reg_dies;
+ int i, j;
+ HARD_REG_SET allocated;
+
+ stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
+ mem_order, &base_reg, &offset, false);
+
+ if (stm_case == 0)
+ return false;
+
+ memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
+
+ /* If the same register is used more than once, try to find a free
+ register. */
+ CLEAR_HARD_REG_SET (allocated);
+ for (i = 0; i < nops; i++)
+ {
+ for (j = i + 1; j < nops; j++)
+ if (regs[i] == regs[j])
+ {
+ rtx t = peep2_find_free_register (0, nops * 2,
+ TARGET_THUMB1 ? "l" : "r",
+ SImode, &allocated);
+ if (t == NULL_RTX)
+ return false;
+ reg_rtxs[i] = t;
+ regs[i] = REGNO (t);
+ }
+ }
+
+ /* Compute an ordering that maps the register numbers to an ascending
+ sequence. */
+ reg_order[0] = 0;
+ for (i = 0; i < nops; i++)
+ if (regs[i] < regs[reg_order[0]])
+ reg_order[0] = i;
+
+ for (i = 1; i < nops; i++)
+ {
+ int this_order = reg_order[i - 1];
+ for (j = 0; j < nops; j++)
+ if (regs[j] > regs[reg_order[i - 1]]
+ && (this_order == reg_order[i - 1]
+ || regs[j] < regs[this_order]))
+ this_order = j;
+ reg_order[i] = this_order;
+ }
+
+ /* Ensure that registers that must be live after the instruction end
+ up with the correct value. */
+ for (i = 0; i < nops; i++)
+ {
+ int this_order = reg_order[i];
+ if ((this_order != mem_order[i]
+ || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
+ && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
+ return false;
+ }
+
+ /* Load the constants. */
+ for (i = 0; i < nops; i++)
+ {
+ rtx op = operands[2 * nops + mem_order[i]];
+ sorted_regs[i] = regs[reg_order[i]];
+ emit_move_insn (reg_rtxs[reg_order[i]], op);
+ }
+
+ base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
+
+ base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
+ if (TARGET_THUMB1)
+ {
+ gcc_assert (base_reg_dies);
+ write_back = TRUE;
+ }
+
+ if (stm_case == 5)
+ {
+ gcc_assert (base_reg_dies);
+ emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
+ offset = 0;
+ }
+
+ addr = plus_constant (base_reg_rtx, offset);
+
+ for (i = 0; i < nops; i++)
+ {
+ addr = plus_constant (base_reg_rtx, offset + i * 4);
+ mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
+ SImode, addr, 0);
+ }
+ emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
+ write_back ? offset + i * 4 : 0));
+ return true;
}
int
@@ -9668,20 +9939,21 @@ arm_gen_movmemqi (rtx *operands)
for (i = 0; in_words_to_go >= 2; i+=4)
{
if (in_words_to_go > 4)
- emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
- srcbase, &srcoffset));
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
+ TRUE, srcbase, &srcoffset));
else
- emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
- FALSE, srcbase, &srcoffset));
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
+ src, FALSE, srcbase,
+ &srcoffset));
if (out_words_to_go)
{
if (out_words_to_go > 4)
- emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
- dstbase, &dstoffset));
+ emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
+ TRUE, dstbase, &dstoffset));
else if (out_words_to_go != 1)
- emit_insn (arm_gen_store_multiple (0, out_words_to_go,
- dst, TRUE,
+ emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
+ out_words_to_go, dst,
(last_bytes == 0
? FALSE : TRUE),
dstbase, &dstoffset));
Index: config/arm/arm.h
===================================================================
--- config/arm/arm.h (revision 158639)
+++ config/arm/arm.h (working copy)
@@ -1087,6 +1087,13 @@ extern int arm_structure_size_boundary;
((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \
|| (MODE) == CImode || (MODE) == XImode)
+/* The maximum number of parallel loads or stores we support in an ldm/stm
+ instruction. */
+#define MAX_LDM_STM_OPS 4
+
+/* The register numbers in sequence, for passing to arm_gen_load_multiple. */
+extern int arm_regs_in_sequence[];
+
/* The order in which register should be allocated. It is good to use ip
since no saving is required (though calls clobber it) and it never contains
function parameters. It is quite good to use lr since other calls may
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h (revision 158639)
+++ config/arm/arm-protos.h (working copy)
@@ -98,14 +98,11 @@ extern int symbol_mentioned_p (rtx);
extern int label_mentioned_p (rtx);
extern RTX_CODE minmax_code (rtx);
extern int adjacent_mem_locations (rtx, rtx);
-extern int load_multiple_sequence (rtx *, int, int *, int *, HOST_WIDE_INT *);
-extern const char *emit_ldm_seq (rtx *, int);
-extern int store_multiple_sequence (rtx *, int, int *, int *, HOST_WIDE_INT *);
-extern const char * emit_stm_seq (rtx *, int);
-extern rtx arm_gen_load_multiple (int, int, rtx, int, int,
- rtx, HOST_WIDE_INT *);
-extern rtx arm_gen_store_multiple (int, int, rtx, int, int,
- rtx, HOST_WIDE_INT *);
+extern bool gen_ldm_seq (rtx *, int, bool);
+extern bool gen_stm_seq (rtx *, int);
+extern bool gen_const_stm_seq (rtx *, int);
+extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
+extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
extern int arm_gen_movmemqi (rtx *);
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
Index: config/arm/ldmstm.md
===================================================================
--- config/arm/ldmstm.md (revision 0)
+++ config/arm/ldmstm.md (revision 0)
@@ -0,0 +1,1155 @@
+/* ARM ldm/stm instruction patterns. This file was automatically generated
+ using arm-ldmstm.ml. Please do not edit manually.
+
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by CodeSourcery.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ . */
+
+(define_insn "*ldm4_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 5 "s_register_operand" "rk")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm4_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 5 "s_register_operand" "l")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ia%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")])
+
+(define_insn "*ldm4_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 5)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+ "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm4_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&l")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 5)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+ "ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")])
+
+(define_insn "*stm4_ia"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "stm%(ia%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (mem:SI (match_dup 5))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+ "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm4_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&l")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (mem:SI (match_dup 5))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
+ "stm%(ia%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")])
+
+(define_insn "*ldm4_ib"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 16))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ib%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_ib_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 12))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int 16))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+ "ldm%(ib%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "stm%(ib%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_ib_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int 16)))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int 16)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+ "stm%(ib%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+ (const_int -12))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -4))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 5)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "ldm%(da%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_da_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int -16)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -12))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -4))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 5)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+ "ldm%(da%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 5))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "stm%(da%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_da_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int -16)))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 5))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
+ "stm%(da%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_db"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk")
+ (const_int -16))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -12))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "ldm%(db%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm4_db_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int -16)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -16))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -12))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -8))))
+ (set (match_operand:SI 4 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 5)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+ "ldm%(db%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_db"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "stm%(db%)\t%5, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm4_db_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 5 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 5) (const_int -16)))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
+ (match_operand:SI 4 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
+ "stm%(db%)\t%5!, {%1, %2, %3, %4}"
+ [(set_attr "type" "store4")
+ (set_attr "predicable" "yes")])
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 4 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 5 "memory_operand" ""))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 6 "memory_operand" ""))
+ (set (match_operand:SI 3 "s_register_operand" "")
+ (match_operand:SI 7 "memory_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_ldm_seq (operands, 4, false))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 8 "const_int_operand" ""))
+ (set (match_operand:SI 4 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 9 "const_int_operand" ""))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_dup 1))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 10 "const_int_operand" ""))
+ (set (match_operand:SI 6 "memory_operand" "")
+ (match_dup 2))
+ (set (match_operand:SI 3 "s_register_operand" "")
+ (match_operand:SI 11 "const_int_operand" ""))
+ (set (match_operand:SI 7 "memory_operand" "")
+ (match_dup 3))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 4))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 8 "const_int_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 9 "const_int_operand" ""))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 10 "const_int_operand" ""))
+ (set (match_operand:SI 3 "s_register_operand" "")
+ (match_operand:SI 11 "const_int_operand" ""))
+ (set (match_operand:SI 4 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_dup 1))
+ (set (match_operand:SI 6 "memory_operand" "")
+ (match_dup 2))
+ (set (match_operand:SI 7 "memory_operand" "")
+ (match_dup 3))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 4))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 4 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))
+ (set (match_operand:SI 6 "memory_operand" "")
+ (match_operand:SI 2 "s_register_operand" ""))
+ (set (match_operand:SI 7 "memory_operand" "")
+ (match_operand:SI 3 "s_register_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_stm_seq (operands, 4))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_insn "*ldm3_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 4 "s_register_operand" "rk")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ia%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm3_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 4 "s_register_operand" "l")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ia%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "load3")])
+
+(define_insn "*ldm3_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 4)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ia%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm3_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&l")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 4)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ia%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "load3")])
+
+(define_insn "*stm3_ia"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "stm%(ia%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (mem:SI (match_dup 4))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "stm%(ia%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm3_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&l")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (mem:SI (match_dup 4))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4"
+ "stm%(ia%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "store3")])
+
+(define_insn "*ldm3_ib"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 12))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ib%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_ib_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int 12))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "ldm%(ib%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "stm%(ib%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_ib_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int 12)))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int 12)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "stm%(ib%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+ (const_int -8))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 4)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "ldm%(da%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_da_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int -12)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -8))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -4))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 4)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "ldm%(da%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 4))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "stm%(da%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_da_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int -12)))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 4))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
+ "stm%(da%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_db"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk")
+ (const_int -12))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "ldm%(db%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm3_db_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int -12)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -12))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -8))))
+ (set (match_operand:SI 3 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 4)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "ldm%(db%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "load3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_db"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "stm%(db%)\t%4, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm3_db_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 4 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 4) (const_int -12)))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
+ (match_operand:SI 3 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
+ "stm%(db%)\t%4!, {%1, %2, %3}"
+ [(set_attr "type" "store3")
+ (set_attr "predicable" "yes")])
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 4 "memory_operand" ""))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 5 "memory_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_ldm_seq (operands, 3, false))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 6 "const_int_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 7 "const_int_operand" ""))
+ (set (match_operand:SI 4 "memory_operand" "")
+ (match_dup 1))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 8 "const_int_operand" ""))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_dup 2))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 3))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 6 "const_int_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 7 "const_int_operand" ""))
+ (set (match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 8 "const_int_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 4 "memory_operand" "")
+ (match_dup 1))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_dup 2))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 3))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 4 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))
+ (set (match_operand:SI 5 "memory_operand" "")
+ (match_operand:SI 2 "s_register_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_stm_seq (operands, 3))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_insn "*ldm2_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 3 "s_register_operand" "rk")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "ldm%(ia%)\t%3, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm2_ia"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_operand:SI 3 "s_register_operand" "l")))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 4))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2"
+ "ldm%(ia%)\t%3, {%1, %2}"
+ [(set_attr "type" "load2")])
+
+(define_insn "*ldm2_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 3)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ia%)\t%3!, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_ldm2_ia_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&l")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 3)))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 4))))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ia%)\t%3!, {%1, %2}"
+ [(set_attr "type" "load2")])
+
+(define_insn "*stm2_ia"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "stm%(ia%)\t%3, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (mem:SI (match_dup 3))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "stm%(ia%)\t%3!, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*thumb_stm2_ia_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&l")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (mem:SI (match_dup 3))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3"
+ "stm%(ia%)\t%3!, {%1, %2}"
+ [(set_attr "type" "store2")])
+
+(define_insn "*ldm2_ib"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 8))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+ "ldm%(ib%)\t%3, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_ib_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int 8))))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "ldm%(ib%)\t%3!, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+ "stm%(ib%)\t%3, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_ib_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int 8)))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int 8)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "stm%(ib%)\t%3!, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+ (const_int -4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 3)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+ "ldm%(da%)\t%3, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_da_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int -8)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int -4))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (match_dup 3)))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "ldm%(da%)\t%3!, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 3))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
+ "stm%(da%)\t%3, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_da_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int -8)))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (match_dup 3))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
+ "stm%(da%)\t%3!, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_db"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk")
+ (const_int -8))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "ldm%(db%)\t%3, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*ldm2_db_update"
+ [(match_parallel 0 "load_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int -8)))
+ (set (match_operand:SI 1 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int -8))))
+ (set (match_operand:SI 2 "arm_hard_register_operand" "")
+ (mem:SI (plus:SI (match_dup 3)
+ (const_int -4))))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "ldm%(db%)\t%3!, {%1, %2}"
+ [(set_attr "type" "load2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_db"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
+ "stm%(db%)\t%3, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_insn "*stm2_db_update"
+ [(match_parallel 0 "store_multiple_operation"
+ [(set (match_operand:SI 3 "s_register_operand" "+&rk")
+ (plus:SI (match_dup 3) (const_int -8)))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
+ (match_operand:SI 1 "arm_hard_register_operand" ""))
+ (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
+ (match_operand:SI 2 "arm_hard_register_operand" ""))])]
+ "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
+ "stm%(db%)\t%3!, {%1, %2}"
+ [(set_attr "type" "store2")
+ (set_attr "predicable" "yes")])
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_ldm_seq (operands, 2, false))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 4 "const_int_operand" ""))
+ (set (match_operand:SI 2 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 5 "const_int_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_dup 1))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 2))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 4 "const_int_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 5 "const_int_operand" ""))
+ (set (match_operand:SI 2 "memory_operand" "")
+ (match_dup 0))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_dup 1))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_const_stm_seq (operands, 2))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 2 "memory_operand" "")
+ (match_operand:SI 0 "s_register_operand" ""))
+ (set (match_operand:SI 3 "memory_operand" "")
+ (match_operand:SI 1 "s_register_operand" ""))]
+ ""
+ [(const_int 0)]
+{
+ if (gen_stm_seq (operands, 2))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))
+ (parallel
+ [(set (match_operand:SI 4 "s_register_operand" "")
+ (match_operator:SI 5 "commutative_binary_operator"
+ [(match_operand:SI 6 "s_register_operand" "")
+ (match_operand:SI 7 "s_register_operand" "")]))
+ (clobber (reg:CC CC_REGNUM))])]
+ "(((operands[6] == operands[0] && operands[7] == operands[1])
+ || (operands[7] == operands[0] && operands[6] == operands[1]))
+ && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))"
+ [(parallel
+ [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))
+ (clobber (reg:CC CC_REGNUM))])]
+{
+ if (!gen_ldm_seq (operands, 2, true))
+ FAIL;
+})
+
+(define_peephole2
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operand:SI 2 "memory_operand" ""))
+ (set (match_operand:SI 1 "s_register_operand" "")
+ (match_operand:SI 3 "memory_operand" ""))
+ (set (match_operand:SI 4 "s_register_operand" "")
+ (match_operator:SI 5 "commutative_binary_operator"
+ [(match_operand:SI 6 "s_register_operand" "")
+ (match_operand:SI 7 "s_register_operand" "")]))]
+ "(((operands[6] == operands[0] && operands[7] == operands[1])
+ || (operands[7] == operands[0] && operands[6] == operands[1]))
+ && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[1]))"
+ [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
+{
+ if (!gen_ldm_seq (operands, 2, true))
+ FAIL;
+})
+
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md (revision 158639)
+++ config/arm/predicates.md (working copy)
@@ -181,6 +181,11 @@ (define_special_predicate "logical_binar
(and (match_code "ior,xor,and")
(match_test "mode == GET_MODE (op)")))
+;; True for commutative operators
+(define_special_predicate "commutative_binary_operator"
+ (and (match_code "ior,xor,and,plus")
+ (match_test "mode == GET_MODE (op)")))
+
;; True for shift operators.
(define_special_predicate "shift_operator"
(and (ior (ior (and (match_code "mult")
@@ -305,13 +310,17 @@ (define_special_predicate "load_multiple
(match_code "parallel")
{
HOST_WIDE_INT count = XVECLEN (op, 0);
- int dest_regno;
+ unsigned dest_regno;
rtx src_addr;
HOST_WIDE_INT i = 1, base = 0;
+ HOST_WIDE_INT offset = 0;
rtx elt;
+ bool addr_reg_loaded = false;
+ bool update = false;
if (count <= 1
- || GET_CODE (XVECEXP (op, 0, 0)) != SET)
+ || GET_CODE (XVECEXP (op, 0, 0)) != SET
+ || !REG_P (SET_DEST (XVECEXP (op, 0, 0))))
return false;
/* Check to see if this might be a write-back. */
@@ -319,6 +328,7 @@ (define_special_predicate "load_multiple
{
i++;
base = 1;
+ update = true;
/* Now check it more carefully. */
if (GET_CODE (SET_DEST (elt)) != REG
@@ -337,6 +347,15 @@ (define_special_predicate "load_multiple
dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1)));
src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0);
+ if (GET_CODE (src_addr) == PLUS)
+ {
+ if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT)
+ return false;
+ offset = INTVAL (XEXP (src_addr, 1));
+ src_addr = XEXP (src_addr, 0);
+ }
+ if (!REG_P (src_addr))
+ return false;
for (; i < count; i++)
{
@@ -345,16 +364,28 @@ (define_special_predicate "load_multiple
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != SImode
- || REGNO (SET_DEST (elt)) != (unsigned int)(dest_regno + i - base)
+ || REGNO (SET_DEST (elt)) <= dest_regno
|| GET_CODE (SET_SRC (elt)) != MEM
|| GET_MODE (SET_SRC (elt)) != SImode
- || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
- || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
- || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
- || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != (i - base) * 4)
+ || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS
+ || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr)
+ || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT
+ || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4)
+ && (!REG_P (XEXP (SET_SRC (elt), 0))
+ || offset + (i - base) * 4 != 0)))
return false;
+ dest_regno = REGNO (SET_DEST (elt));
+ if (dest_regno == REGNO (src_addr))
+ addr_reg_loaded = true;
}
-
+ /* For Thumb, we only have updating instructions. If the pattern does
+ not describe an update, it must be because the address register is
+ in the list of loaded registers - on the hardware, this has the effect
+ of overriding the update. */
+ if (update && addr_reg_loaded)
+ return false;
+ if (TARGET_THUMB1)
+ return update || addr_reg_loaded;
return true;
})
@@ -362,9 +393,9 @@ (define_special_predicate "store_multipl
(match_code "parallel")
{
HOST_WIDE_INT count = XVECLEN (op, 0);
- int src_regno;
+ unsigned src_regno;
rtx dest_addr;
- HOST_WIDE_INT i = 1, base = 0;
+ HOST_WIDE_INT i = 1, base = 0, offset = 0;
rtx elt;
if (count <= 1
@@ -395,6 +426,16 @@ (define_special_predicate "store_multipl
src_regno = REGNO (SET_SRC (XVECEXP (op, 0, i - 1)));
dest_addr = XEXP (SET_DEST (XVECEXP (op, 0, i - 1)), 0);
+ if (GET_CODE (dest_addr) == PLUS)
+ {
+ if (GET_CODE (XEXP (dest_addr, 1)) != CONST_INT)
+ return false;
+ offset = INTVAL (XEXP (dest_addr, 1));
+ dest_addr = XEXP (dest_addr, 0);
+ }
+ if (!REG_P (dest_addr))
+ return false;
+
for (; i < count; i++)
{
elt = XVECEXP (op, 0, i);
@@ -402,14 +443,17 @@ (define_special_predicate "store_multipl
if (GET_CODE (elt) != SET
|| GET_CODE (SET_SRC (elt)) != REG
|| GET_MODE (SET_SRC (elt)) != SImode
- || REGNO (SET_SRC (elt)) != (unsigned int)(src_regno + i - base)
+ || REGNO (SET_SRC (elt)) <= src_regno
|| GET_CODE (SET_DEST (elt)) != MEM
|| GET_MODE (SET_DEST (elt)) != SImode
- || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
- || !rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
- || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
- || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != (i - base) * 4)
+ || ((GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS
+ || !rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_addr)
+ || GET_CODE (XEXP (XEXP (SET_DEST (elt), 0), 1)) != CONST_INT
+ || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != offset + (i - base) * 4)
+ && (!REG_P (XEXP (SET_DEST (elt), 0))
+ || offset + (i - base) * 4 != 0)))
return false;
+ src_regno = REGNO (SET_SRC (elt));
}
return true;
Index: config/arm/arm-ldmstm.ml
===================================================================
--- config/arm/arm-ldmstm.ml (revision 0)
+++ config/arm/arm-ldmstm.ml (revision 0)
@@ -0,0 +1,311 @@
+(* Auto-generate ARM ldm/stm patterns
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ Contributed by CodeSourcery.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3, or (at your option) any later
+ version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ .
+
+ This is an O'Caml program. The O'Caml compiler is available from:
+
+ http://caml.inria.fr/
+
+ Or from your favourite OS's friendly packaging system. Tested with version
+ 3.09.2, though other versions will probably work too.
+
+ Run with:
+ ocaml arm-ldmstm.ml >/path/to/gcc/config/arm/ldmstm.ml
+*)
+
+type amode = IA | IB | DA | DB
+
+type optype = IN | OUT | INOUT
+
+let rec string_of_addrmode addrmode =
+ match addrmode with
+ IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db"
+
+let rec initial_offset addrmode nregs =
+ match addrmode with
+ IA -> 0
+ | IB -> 4
+ | DA -> -4 * nregs + 4
+ | DB -> -4 * nregs
+
+let rec final_offset addrmode nregs =
+ match addrmode with
+ IA -> nregs * 4
+ | IB -> nregs * 4
+ | DA -> -4 * nregs
+ | DB -> -4 * nregs
+
+let constr thumb =
+ if thumb then "l" else "rk"
+
+let inout_constr op_type =
+ match op_type with
+ OUT -> "=&"
+ | INOUT -> "+&"
+ | IN -> ""
+
+let destreg nregs first op_type thumb =
+ if not first then
+ Printf.sprintf "(match_dup %d)" (nregs + 1)
+ else
+ Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")")
+ (nregs + 1) (inout_constr op_type) (constr thumb)
+
+let write_ldm_set thumb nregs offset opnr first =
+ let indent = " " in
+ Printf.printf "%s" (if first then " [" else indent);
+ Printf.printf "(set (match_operand:SI %d \"arm_hard_register_operand\" \"\")\n" opnr;
+ Printf.printf "%s (mem:SI " indent;
+ begin if offset != 0 then Printf.printf "(plus:SI " end;
+ Printf.printf "%s" (destreg nregs first IN thumb);
+ begin if offset != 0 then Printf.printf "\n%s (const_int %d))" indent offset end;
+ Printf.printf "))"
+
+let write_stm_set thumb nregs offset opnr first =
+ let indent = " " in
+ Printf.printf "%s" (if first then " [" else indent);
+ Printf.printf "(set (mem:SI ";
+ begin if offset != 0 then Printf.printf "(plus:SI " end;
+ Printf.printf "%s" (destreg nregs first IN thumb);
+ begin if offset != 0 then Printf.printf " (const_int %d))" offset end;
+ Printf.printf ")\n%s (match_operand:SI %d \"arm_hard_register_operand\" \"\"))" indent opnr
+
+let write_ldm_peep_set extra_indent nregs opnr first =
+ let indent = " " ^ extra_indent in
+ Printf.printf "%s" (if first then extra_indent ^ " [" else indent);
+ Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+ Printf.printf "%s (match_operand:SI %d \"memory_operand\" \"\"))" indent (nregs + opnr)
+
+let write_stm_peep_set extra_indent nregs opnr first =
+ let indent = " " ^ extra_indent in
+ Printf.printf "%s" (if first then extra_indent ^ " [" else indent);
+ Printf.printf "(set (match_operand:SI %d \"memory_operand\" \"\")\n" (nregs + opnr);
+ Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\"))" indent opnr
+
+let write_any_load optype nregs opnr first =
+ let indent = " " in
+ Printf.printf "%s" (if first then " [" else indent);
+ Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr;
+ Printf.printf "%s (match_operand:SI %d \"%s\" \"\"))" indent (nregs * 2 + opnr) optype
+
+let write_const_store nregs opnr first =
+ let indent = " " in
+ Printf.printf "%s(set (match_operand:SI %d \"memory_operand\" \"\")\n" indent (nregs + opnr);
+ Printf.printf "%s (match_dup %d))" indent opnr
+
+let write_const_stm_peep_set nregs opnr first =
+ write_any_load "const_int_operand" nregs opnr first;
+ Printf.printf "\n";
+ write_const_store nregs opnr false
+
+
+let rec write_pat_sets func opnr offset first n_left =
+ func offset opnr first;
+ begin
+ if n_left > 1 then begin
+ Printf.printf "\n";
+ write_pat_sets func (opnr + 1) (offset + 4) false (n_left - 1);
+ end else
+ Printf.printf "]"
+ end
+
+let rec write_peep_sets func opnr first n_left =
+ func opnr first;
+ begin
+ if n_left > 1 then begin
+ Printf.printf "\n";
+ write_peep_sets func (opnr + 1) false (n_left - 1);
+ end
+ end
+
+let can_thumb addrmode update is_store =
+ match addrmode, update, is_store with
+ (* Thumb1 mode only supports IA with update. However, for LDMIA,
+ if the address register also appears in the list of loaded
+ registers, the loaded value is stored, hence the RTL pattern
+ to describe such an insn does not have an update. We check
+ in the match_parallel predicate that the condition described
+ above is met. *)
+ IA, _, false -> true
+ | IA, true, true -> true
+ | _ -> false
+
+let target addrmode thumb =
+ match addrmode, thumb with
+ IA, true -> "TARGET_THUMB1"
+ | IA, false -> "TARGET_32BIT"
+ | DB, false -> "TARGET_32BIT"
+ | _, false -> "TARGET_ARM"
+
+let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
+ let astr = string_of_addrmode addrmode in
+ Printf.printf "(define_insn \"*%s%s%d_%s%s\"\n"
+ (if thumb then "thumb_" else "") name nregs astr
+ (if update then "_update" else "");
+ Printf.printf " [(match_parallel 0 \"%s_multiple_operation\"\n" ls;
+ begin
+ if update then begin
+ Printf.printf " [(set %s\n (plus:SI %s"
+ (destreg nregs true INOUT thumb) (destreg nregs false IN thumb);
+ Printf.printf " (const_int %d)))\n"
+ (final_offset addrmode nregs)
+ end
+ end;
+ write_pat_sets
+ (write_set_fn thumb nregs) 1
+ (initial_offset addrmode nregs)
+ (not update) nregs;
+ Printf.printf ")]\n \"%s && XVECLEN (operands[0], 0) == %d\"\n"
+ (target addrmode thumb)
+ (if update then nregs + 1 else nregs);
+ Printf.printf " \"%s%%(%s%%)\\t%%%d%s, {"
+ name astr (nregs + 1) (if update then "!" else "");
+ for n = 1 to nregs; do
+ Printf.printf "%%%d%s" n (if n < nregs then ", " else "")
+ done;
+ Printf.printf "}\"\n";
+ Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs;
+ begin if not thumb then
+ Printf.printf "\n (set_attr \"predicable\" \"yes\")";
+ end;
+ Printf.printf "])\n\n"
+
+let write_ldm_pattern addrmode nregs update =
+ write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update false;
+ begin if can_thumb addrmode update false then
+ write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update true;
+ end
+
+let write_stm_pattern addrmode nregs update =
+ write_pattern_1 "stm" "store" addrmode nregs write_stm_set update false;
+ begin if can_thumb addrmode update true then
+ write_pattern_1 "stm" "store" addrmode nregs write_stm_set update true;
+ end
+
+let write_ldm_commutative_peephole thumb =
+ let nregs = 2 in
+ Printf.printf "(define_peephole2\n";
+ write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+ let indent = " " in
+ if thumb then begin
+ Printf.printf "\n%s(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+ Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+ Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+ Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))]\n" indent (nregs * 2 + 3)
+ end else begin
+ Printf.printf "\n%s(parallel\n" indent;
+ Printf.printf "%s [(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2);
+ Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1);
+ Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2);
+ Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3);
+ Printf.printf "%s (clobber (reg:CC CC_REGNUM))])]\n" indent
+ end;
+ Printf.printf " \"(((operands[%d] == operands[0] && operands[%d] == operands[1])\n" (nregs * 2 + 2) (nregs * 2 + 3);
+ Printf.printf " || (operands[%d] == operands[0] && operands[%d] == operands[1]))\n" (nregs * 2 + 3) (nregs * 2 + 2);
+ Printf.printf " && peep2_reg_dead_p (%d, operands[0]) && peep2_reg_dead_p (%d, operands[1]))\"\n" (nregs + 1) (nregs + 1);
+ begin
+ if thumb then
+ Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n"
+ (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3)
+ else begin
+ Printf.printf " [(parallel\n";
+ Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))\n"
+ (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3);
+ Printf.printf " (clobber (reg:CC CC_REGNUM))])]\n"
+ end
+ end;
+ Printf.printf "{\n if (!gen_ldm_seq (operands, %d, true))\n FAIL;\n" nregs;
+ Printf.printf "})\n\n"
+
+let write_ldm_peephole nregs =
+ Printf.printf "(define_peephole2\n";
+ write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs;
+ Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n";
+ Printf.printf " if (gen_ldm_seq (operands, %d, false))\n DONE;\n else\n FAIL;\n})\n\n" nregs
+
+let write_stm_peephole nregs =
+ Printf.printf "(define_peephole2\n";
+ write_peep_sets (write_stm_peep_set "" nregs) 0 true nregs;
+ Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n";
+ Printf.printf " if (gen_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs
+
+let write_const_stm_peephole_a nregs =
+ Printf.printf "(define_peephole2\n";
+ write_peep_sets (write_const_stm_peep_set nregs) 0 true nregs;
+ Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n";
+ Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs
+
+let write_const_stm_peephole_b nregs =
+ Printf.printf "(define_peephole2\n";
+ write_peep_sets (write_any_load "const_int_operand" nregs) 0 true nregs;
+ Printf.printf "\n";
+ write_peep_sets (write_const_store nregs) 0 false nregs;
+ Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n";
+ Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs
+
+let patterns () =
+ let addrmodes = [ IA; IB; DA; DB ] in
+ let sizes = [ 4; 3; 2] in
+ List.iter
+ (fun n ->
+ List.iter
+ (fun addrmode ->
+ write_ldm_pattern addrmode n false;
+ write_ldm_pattern addrmode n true;
+ write_stm_pattern addrmode n false;
+ write_stm_pattern addrmode n true)
+ addrmodes;
+ write_ldm_peephole n;
+ write_const_stm_peephole_a n;
+ write_const_stm_peephole_b n;
+ write_stm_peephole n;)
+ sizes;
+ write_ldm_commutative_peephole false;
+ write_ldm_commutative_peephole true
+
+let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
+
+(* Do it. *)
+
+let _ =
+ print_lines [
+"/* ARM ldm/stm instruction patterns. This file was automatically generated";
+" using arm-ldmstm.ml. Please do not edit manually.";
+"";
+" Copyright (C) 2010 Free Software Foundation, Inc.";
+" Contributed by CodeSourcery.";
+"";
+" This file is part of GCC.";
+"";
+" GCC is free software; you can redistribute it and/or modify it";
+" under the terms of the GNU General Public License as published";
+" by the Free Software Foundation; either version 3, or (at your";
+" option) any later version.";
+"";
+" GCC is distributed in the hope that it will be useful, but WITHOUT";
+" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
+" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public";
+" License for more details.";
+"";
+" You should have received a copy of the GNU General Public License and";
+" a copy of the GCC Runtime Library Exception along with this program;";
+" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see";
+" . */";
+""];
+ patterns ();
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md (revision 158639)
+++ config/arm/arm.md (working copy)
@@ -6257,7 +6257,7 @@ (define_expand "movxf"
;; load- and store-multiple insns
;; The arm can load/store any set of registers, provided that they are in
-;; ascending order; but that is beyond GCC so stick with what it knows.
+;; ascending order, but these expanders assume a contiguous set.
(define_expand "load_multiple"
[(match_par_dup 3 [(set (match_operand:SI 0 "" "")
@@ -6278,126 +6278,12 @@ (define_expand "load_multiple"
FAIL;
operands[3]
- = arm_gen_load_multiple (REGNO (operands[0]), INTVAL (operands[2]),
+ = arm_gen_load_multiple (arm_regs_in_sequence + REGNO (operands[0]),
+ INTVAL (operands[2]),
force_reg (SImode, XEXP (operands[1], 0)),
- TRUE, FALSE, operands[1], &offset);
+ FALSE, operands[1], &offset);
})
-;; Load multiple with write-back
-
-(define_insn "*ldmsi_postinc4"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 16)))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (match_dup 2)))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 4))))
- (set (match_operand:SI 5 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 8))))
- (set (match_operand:SI 6 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
- "ldm%(ia%)\\t%1!, {%3, %4, %5, %6}"
- [(set_attr "type" "load4")
- (set_attr "predicable" "yes")]
-)
-
-(define_insn "*ldmsi_postinc4_thumb1"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=l")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 16)))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (match_dup 2)))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 4))))
- (set (match_operand:SI 5 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 8))))
- (set (match_operand:SI 6 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
- "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
- "ldmia\\t%1!, {%3, %4, %5, %6}"
- [(set_attr "type" "load4")]
-)
-
-(define_insn "*ldmsi_postinc3"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 12)))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (match_dup 2)))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 4))))
- (set (match_operand:SI 5 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 8))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
- "ldm%(ia%)\\t%1!, {%3, %4, %5}"
- [(set_attr "type" "load3")
- (set_attr "predicable" "yes")]
-)
-
-(define_insn "*ldmsi_postinc2"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 8)))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (match_dup 2)))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 2) (const_int 4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
- "ldm%(ia%)\\t%1!, {%3, %4}"
- [(set_attr "type" "load2")
- (set_attr "predicable" "yes")]
-)
-
-;; Ordinary load multiple
-
-(define_insn "*ldmsi4"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 2 "arm_hard_register_operand" "")
- (mem:SI (match_operand:SI 1 "s_register_operand" "r")))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 4))))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 8))))
- (set (match_operand:SI 5 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
- "ldm%(ia%)\\t%1, {%2, %3, %4, %5}"
- [(set_attr "type" "load4")
- (set_attr "predicable" "yes")]
-)
-
-(define_insn "*ldmsi3"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 2 "arm_hard_register_operand" "")
- (mem:SI (match_operand:SI 1 "s_register_operand" "r")))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 4))))
- (set (match_operand:SI 4 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
- "ldm%(ia%)\\t%1, {%2, %3, %4}"
- [(set_attr "type" "load3")
- (set_attr "predicable" "yes")]
-)
-
-(define_insn "*ldmsi2"
- [(match_parallel 0 "load_multiple_operation"
- [(set (match_operand:SI 2 "arm_hard_register_operand" "")
- (mem:SI (match_operand:SI 1 "s_register_operand" "r")))
- (set (match_operand:SI 3 "arm_hard_register_operand" "")
- (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
- "ldm%(ia%)\\t%1, {%2, %3}"
- [(set_attr "type" "load2")
- (set_attr "predicable" "yes")]
-)
-
(define_expand "store_multiple"
[(match_par_dup 3 [(set (match_operand:SI 0 "" "")
(match_operand:SI 1 "" ""))
@@ -6417,125 +6303,12 @@ (define_expand "store_multiple"
FAIL;
operands[3]
- = arm_gen_store_multiple (REGNO (operands[1]), INTVAL (operands[2]),
+ = arm_gen_store_multiple (arm_regs_in_sequence + REGNO (operands[1]),
+ INTVAL (operands[2]),
force_reg (SImode, XEXP (operands[0], 0)),
- TRUE, FALSE, operands[0], &offset);
+ FALSE, operands[0], &offset);
})
-;; Store multiple with write-back
-
-(define_insn "*stmsi_postinc4"
- [(match_parallel 0 "store_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 16)))
- (set (mem:SI (match_dup 2))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
- (match_operand:SI 5 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
- (match_operand:SI 6 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
- "stm%(ia%)\\t%1!, {%3, %4, %5, %6}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store4")]
-)
-
-(define_insn "*stmsi_postinc4_thumb1"
- [(match_parallel 0 "store_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=l")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 16)))
- (set (mem:SI (match_dup 2))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
- (match_operand:SI 5 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 12)))
- (match_operand:SI 6 "arm_hard_register_operand" ""))])]
- "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5"
- "stmia\\t%1!, {%3, %4, %5, %6}"
- [(set_attr "type" "store4")]
-)
-
-(define_insn "*stmsi_postinc3"
- [(match_parallel 0 "store_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 12)))
- (set (mem:SI (match_dup 2))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 8)))
- (match_operand:SI 5 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
- "stm%(ia%)\\t%1!, {%3, %4, %5}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store3")]
-)
-
-(define_insn "*stmsi_postinc2"
- [(match_parallel 0 "store_multiple_operation"
- [(set (match_operand:SI 1 "s_register_operand" "=r")
- (plus:SI (match_operand:SI 2 "s_register_operand" "1")
- (const_int 8)))
- (set (mem:SI (match_dup 2))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 2) (const_int 4)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
- "stm%(ia%)\\t%1!, {%3, %4}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store2")]
-)
-
-;; Ordinary store multiple
-
-(define_insn "*stmsi4"
- [(match_parallel 0 "store_multiple_operation"
- [(set (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
- (match_operand:SI 2 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 12)))
- (match_operand:SI 5 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
- "stm%(ia%)\\t%1, {%2, %3, %4, %5}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store4")]
-)
-
-(define_insn "*stmsi3"
- [(match_parallel 0 "store_multiple_operation"
- [(set (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
- (match_operand:SI 2 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
- (match_operand:SI 3 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 8)))
- (match_operand:SI 4 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
- "stm%(ia%)\\t%1, {%2, %3, %4}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store3")]
-)
-
-(define_insn "*stmsi2"
- [(match_parallel 0 "store_multiple_operation"
- [(set (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
- (match_operand:SI 2 "arm_hard_register_operand" ""))
- (set (mem:SI (plus:SI (match_dup 1) (const_int 4)))
- (match_operand:SI 3 "arm_hard_register_operand" ""))])]
- "TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
- "stm%(ia%)\\t%1, {%2, %3}"
- [(set_attr "predicable" "yes")
- (set_attr "type" "store2")]
-)
;; Move a block of memory if it is word aligned and MORE than 2 words long.
;; We could let this apply for blocks of less than this, but it clobbers so
@@ -8894,8 +8667,8 @@ (define_expand "untyped_call"
if (REGNO (reg) == R0_REGNUM)
{
/* On thumb we have to use a write-back instruction. */
- emit_insn (arm_gen_store_multiple (R0_REGNUM, 4, addr, TRUE,
- TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+ emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr,
+ TARGET_THUMB ? TRUE : FALSE, mem, &offset));
size = TARGET_ARM ? 16 : 0;
}
else
@@ -8941,8 +8714,8 @@ (define_expand "untyped_return"
if (REGNO (reg) == R0_REGNUM)
{
/* On thumb we have to use a write-back instruction. */
- emit_insn (arm_gen_load_multiple (R0_REGNUM, 4, addr, TRUE,
- TARGET_THUMB ? TRUE : FALSE, mem, &offset));
+ emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr,
+ TARGET_THUMB ? TRUE : FALSE, mem, &offset));
size = TARGET_ARM ? 16 : 0;
}
else
@@ -10459,87 +10232,6 @@ (define_peephole2
""
)
-; Peepholes to spot possible load- and store-multiples, if the ordering is
-; reversed, check that the memory references aren't volatile.
-
-(define_peephole
- [(set (match_operand:SI 0 "s_register_operand" "=rk")
- (match_operand:SI 4 "memory_operand" "m"))
- (set (match_operand:SI 1 "s_register_operand" "=rk")
- (match_operand:SI 5 "memory_operand" "m"))
- (set (match_operand:SI 2 "s_register_operand" "=rk")
- (match_operand:SI 6 "memory_operand" "m"))
- (set (match_operand:SI 3 "s_register_operand" "=rk")
- (match_operand:SI 7 "memory_operand" "m"))]
- "TARGET_ARM && load_multiple_sequence (operands, 4, NULL, NULL, NULL)"
- "*
- return emit_ldm_seq (operands, 4);
- "
-)
-
-(define_peephole
- [(set (match_operand:SI 0 "s_register_operand" "=rk")
- (match_operand:SI 3 "memory_operand" "m"))
- (set (match_operand:SI 1 "s_register_operand" "=rk")
- (match_operand:SI 4 "memory_operand" "m"))
- (set (match_operand:SI 2 "s_register_operand" "=rk")
- (match_operand:SI 5 "memory_operand" "m"))]
- "TARGET_ARM && load_multiple_sequence (operands, 3, NULL, NULL, NULL)"
- "*
- return emit_ldm_seq (operands, 3);
- "
-)
-
-(define_peephole
- [(set (match_operand:SI 0 "s_register_operand" "=rk")
- (match_operand:SI 2 "memory_operand" "m"))
- (set (match_operand:SI 1 "s_register_operand" "=rk")
- (match_operand:SI 3 "memory_operand" "m"))]
- "TARGET_ARM && load_multiple_sequence (operands, 2, NULL, NULL, NULL)"
- "*
- return emit_ldm_seq (operands, 2);
- "
-)
-
-(define_peephole
- [(set (match_operand:SI 4 "memory_operand" "=m")
- (match_operand:SI 0 "s_register_operand" "rk"))
- (set (match_operand:SI 5 "memory_operand" "=m")
- (match_operand:SI 1 "s_register_operand" "rk"))
- (set (match_operand:SI 6 "memory_operand" "=m")
- (match_operand:SI 2 "s_register_operand" "rk"))
- (set (match_operand:SI 7 "memory_operand" "=m")
- (match_operand:SI 3 "s_register_operand" "rk"))]
- "TARGET_ARM && store_multiple_sequence (operands, 4, NULL, NULL, NULL)"
- "*
- return emit_stm_seq (operands, 4);
- "
-)
-
-(define_peephole
- [(set (match_operand:SI 3 "memory_operand" "=m")
- (match_operand:SI 0 "s_register_operand" "rk"))
- (set (match_operand:SI 4 "memory_operand" "=m")
- (match_operand:SI 1 "s_register_operand" "rk"))
- (set (match_operand:SI 5 "memory_operand" "=m")
- (match_operand:SI 2 "s_register_operand" "rk"))]
- "TARGET_ARM && store_multiple_sequence (operands, 3, NULL, NULL, NULL)"
- "*
- return emit_stm_seq (operands, 3);
- "
-)
-
-(define_peephole
- [(set (match_operand:SI 2 "memory_operand" "=m")
- (match_operand:SI 0 "s_register_operand" "rk"))
- (set (match_operand:SI 3 "memory_operand" "=m")
- (match_operand:SI 1 "s_register_operand" "rk"))]
- "TARGET_ARM && store_multiple_sequence (operands, 2, NULL, NULL, NULL)"
- "*
- return emit_stm_seq (operands, 2);
- "
-)
-
(define_split
[(set (match_operand:SI 0 "s_register_operand" "")
(and:SI (ge:SI (match_operand:SI 1 "s_register_operand" "")
@@ -11323,6 +11015,8 @@ (define_expand "bswapsi2"
"
)
+;; Load the load/store multiple patterns
+(include "ldmstm.md")
;; Load the FPA co-processor patterns
(include "fpa.md")
;; Load the Maverick co-processor patterns