This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, ARM] Low interrupt latency support (avoiding ldm/stm)
- From: Julian Brown <julian at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: rearnsha at arm dot com, paul at codesourcery dot com
- Date: Fri, 27 Nov 2009 17:50:25 +0000
- Subject: [PATCH, ARM] Low interrupt latency support (avoiding ldm/stm)
Hi,
This patch (originally by Vladimir Prus) provides support for improving
interrupt latency by avoiding LDM/STM instructions, which may not be
interruptible mid-operation on some (all?) cores. This is achieved by
adding a compilation option (-mlow-interrupt-latency), since code
without multi-reg loads/stores will generally be both larger and slower.
The LDM/STM are avoided both in generated code and in a couple of
run-time library bits. For best results these library bits need to be
compiled using -mlow-interrupt-latency, perhaps as a separate multilib.
Tested with no regressions with cross-compiler to ARM EABI (with
-mlow-interrupt-latency enabled for the "after" pass), excepting tests
which force Thumb mode.
OK to apply?
Julian
ChangeLog
Vladimir Prus <vladimir@codesourcery.com>
gcc/
* config/arm/arm.c (arm_override_options): Warn if mlow-irq-latency is
specified in thumb mode.
(load_multiple_sequence): Return 0 if low irq latency is requested.
(store_multiple_sequence): Likewise.
(arm_gen_load_multiple): Load registers one-by-one if low irq latency
is requested.
(arm_gen_store_multiple): Likewise.
(vfp_output_fldmd): When low_irq_latency is non zero, pop each
register separately.
(vfp_emit_fstmd): When low_irq_latency is non zero, save each register
separately.
(arm_get_vfp_saved_size): Adjust saved register size calculation for
the above changes.
(print_pop_reg_by_ldr): New.
(arm_output_epilogue): Use print_pop_reg_by_ldr when low irq latency
is requested.
(emit_multi_reg_push): Push registers separately if low irq latency
is requested.
* config/arm/arm.h (TARGET_CPU_CPP_BUILTINS): Set __low_irq_latency__.
(low_irq_latency): Define.
* config/arm/lib1funcs.asm (do_pop, do_push): Define as variadic
macros. When __low_irq_latency__ is defined, push and pop registers
individually.
(div0): Use correct punctuation.
* config/arm/ieee754-df.S: Adjust syntax of using do_push.
* config/arm/ieee754-sf.S: Likewise.
* config/arm/bpabi.S: Likewise.
* config/arm/arm.opt (mlow-irq-latency): New option.
* config/arm/predicates.md (load_multiple_operation): Return false is
low irq latency is requested.
(store_multiple_operation): Likewise.
* config/arm/arm.md (movmemqi): Don't use it if low irq latency is
requested.
* doc/invoke.texi (-mlow-irq-latency): Add documentation.
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 154712)
+++ gcc/doc/invoke.texi (working copy)
@@ -469,6 +469,7 @@ Objective-C and Objective-C++ Dialects}.
-mtpcs-frame -mtpcs-leaf-frame @gol
-mcaller-super-interworking -mcallee-super-interworking @gol
-mtp=@var{name} @gol
+-mlow-irq-latency @gol
-mword-relocations @gol
-mfix-cortex-m3-ldrd}
@@ -9419,6 +9420,12 @@ be as many clauses as you need. This ma
@code{,}, @code{!}, @code{|}, and @code{*} as needed.
+@item -mlow-irq-latency
+@opindex mlow-irq-latency
+Avoid instructions with high interrupt latency when generating
+code. This can increase code size and reduce performance.
+The option is off by default.
+
@end table
The conditional text @code{X} in a %@{@code{S}:@code{X}@} or similar
Index: gcc/config/arm/ieee754-df.S
===================================================================
--- gcc/config/arm/ieee754-df.S (revision 154712)
+++ gcc/config/arm/ieee754-df.S (working copy)
@@ -83,7 +83,7 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3
ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3
-1: do_push {r4, r5, lr}
+1: do_push (r4, r5, lr)
@ Look for zeroes, equal values, INF, or NAN.
shift1 lsl, r4, xh, #1
@@ -427,7 +427,7 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
do_it eq, t
moveq r1, #0
RETc(eq)
- do_push {r4, r5, lr}
+ do_push (r4, r5, lr)
mov r4, #0x400 @ initial exponent
add r4, r4, #(52-1 - 1)
mov r5, #0 @ sign bit is 0
@@ -447,7 +447,7 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf
do_it eq, t
moveq r1, #0
RETc(eq)
- do_push {r4, r5, lr}
+ do_push (r4, r5, lr)
mov r4, #0x400 @ initial exponent
add r4, r4, #(52-1 - 1)
ands r5, r0, #0x80000000 @ sign bit in r5
@@ -481,7 +481,7 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
RETc(eq) @ we are done already.
@ value was denormalized. We can normalize it now.
- do_push {r4, r5, lr}
+ do_push (r4, r5, lr)
mov r4, #0x380 @ setup corresponding exponent
and r5, xh, #0x80000000 @ move sign bit in r5
bic xh, xh, #0x80000000
@@ -508,9 +508,9 @@ ARM_FUNC_ALIAS aeabi_ul2d floatundidf
@ compatibility.
adr ip, LSYM(f0_ret)
@ Push pc as well so that RETLDM works correctly.
- do_push {r4, r5, ip, lr, pc}
+ do_push (r4, r5, ip, lr, pc)
#else
- do_push {r4, r5, lr}
+ do_push (r4, r5, lr)
#endif
mov r5, #0
@@ -534,9 +534,9 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
@ compatibility.
adr ip, LSYM(f0_ret)
@ Push pc as well so that RETLDM works correctly.
- do_push {r4, r5, ip, lr, pc}
+ do_push (r4, r5, ip, lr, pc)
#else
- do_push {r4, r5, lr}
+ do_push (r4, r5, lr)
#endif
ands r5, ah, #0x80000000 @ sign bit in r5
@@ -585,7 +585,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
@ Legacy code expects the result to be returned in f0. Copy it
@ there as well.
LSYM(f0_ret):
- do_push {r0, r1}
+ do_push (r0, r1)
ldfd f0, [sp], #8
RETLDM
@@ -602,7 +602,7 @@ LSYM(f0_ret):
ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3
- do_push {r4, r5, r6, lr}
+ do_push (r4, r5, r6, lr)
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
@@ -910,7 +910,7 @@ LSYM(Lml_n):
ARM_FUNC_START divdf3
ARM_FUNC_ALIAS aeabi_ddiv divdf3
- do_push {r4, r5, r6, lr}
+ do_push (r4, r5, r6, lr)
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
@@ -1195,7 +1195,7 @@ ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmp
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
-6: do_push {r0, lr}
+6: do_push (r0, lr)
ARM_CALL cmpdf2
@ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 154712)
+++ gcc/config/arm/arm.c (working copy)
@@ -1882,6 +1882,13 @@ arm_override_options (void)
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
+
+ if (low_irq_latency && TARGET_THUMB)
+ {
+ warning (0,
+ "-low-irq-latency has no effect when compiling for the Thumb");
+ low_irq_latency = 0;
+ }
}
static void
@@ -8947,6 +8954,9 @@ load_multiple_sequence (rtx *operands, i
int base_reg = -1;
int i;
+ if (low_irq_latency)
+ return 0;
+
/* Can only handle 2, 3, or 4 insns at present,
though could be easily extended if required. */
gcc_assert (nops >= 2 && nops <= 4);
@@ -9176,6 +9186,9 @@ store_multiple_sequence (rtx *operands,
int base_reg = -1;
int i;
+ if (low_irq_latency)
+ return 0;
+
/* Can only handle 2, 3, or 4 insns at present, though could be easily
extended if required. */
gcc_assert (nops >= 2 && nops <= 4);
@@ -9383,7 +9396,7 @@ arm_gen_load_multiple (int base_regno, i
As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
for counts of 3 or 4 regs. */
- if (arm_tune_xscale && count <= 2 && ! optimize_size)
+ if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
{
rtx seq;
@@ -9446,7 +9459,7 @@ arm_gen_store_multiple (int base_regno,
/* See arm_gen_load_multiple for discussion of
the pros/cons of ldm/stm usage for XScale. */
- if (arm_tune_xscale && count <= 2 && ! optimize_size)
+ if (low_irq_latency || (arm_tune_xscale && count <= 2 && ! optimize_size))
{
rtx seq;
@@ -11583,6 +11596,21 @@ static void
vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
{
int i;
+ int offset;
+
+ if (low_irq_latency)
+ {
+ /* Output a sequence of FLDD instructions. */
+ offset = 0;
+ for (i = reg; i < reg + count; ++i, offset += 8)
+ {
+ fputc ('\t', stream);
+ asm_fprintf (stream, "fldd\td%d, [%r,#%d]\n", i, base, offset);
+ }
+ asm_fprintf (stream, "\tadd\tsp, sp, #%d\n", count * 8);
+ return;
+ }
+
/* Workaround ARM10 VFPr1 bug. */
if (count == 2 && !arm_arch6)
@@ -11653,6 +11681,56 @@ vfp_emit_fstmd (int base_reg, int count)
rtx tmp, reg;
int i;
+ if (low_irq_latency)
+ {
+ int saved_size;
+ rtx sp_insn;
+
+ if (!count)
+ return 0;
+
+ saved_size = count * GET_MODE_SIZE (DFmode);
+
+ /* Since fstd does not have postdecrement addressing mode,
+ we first decrement stack pointer and then use base+offset
+ stores for VFP registers. The ARM EABI unwind information
+ can't easily describe base+offset loads, so we attach
+ a note for the effects of the whole block in the first insn,
+ and avoid marking the subsequent instructions
+ with RTX_FRAME_RELATED_P. */
+ sp_insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-saved_size));
+ sp_insn = emit_insn (sp_insn);
+ RTX_FRAME_RELATED_P (sp_insn) = 1;
+
+ dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
+ XVECEXP (dwarf, 0, 0) =
+ gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -saved_size));
+
+ /* push double VFP registers to stack */
+ for (i = 0; i < count; ++i )
+ {
+ rtx reg;
+ rtx mem;
+ rtx addr;
+ rtx insn;
+ reg = gen_rtx_REG (DFmode, base_reg + 2*i);
+ addr = (i == 0) ? stack_pointer_rtx
+ : gen_rtx_PLUS (SImode, stack_pointer_rtx,
+ GEN_INT (i * GET_MODE_SIZE (DFmode)));
+ mem = gen_frame_mem (DFmode, addr);
+ insn = emit_move_insn (mem, reg);
+ XVECEXP (dwarf, 0, i+1) =
+ gen_rtx_SET (VOIDmode, mem, reg);
+ }
+
+ REG_NOTES (sp_insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
+ REG_NOTES (sp_insn));
+
+ return saved_size;
+ }
+
/* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
register pairs are stored by a store multiple insn. We avoid this
by pushing an extra pair. */
@@ -13092,7 +13170,7 @@ arm_get_vfp_saved_size (void)
if (count > 0)
{
/* Workaround ARM10 VFPr1 bug. */
- if (count == 2 && !arm_arch6)
+ if (count == 2 && !arm_arch6 && !low_irq_latency)
count++;
saved += count * 8;
}
@@ -13430,6 +13508,41 @@ arm_output_function_prologue (FILE *f, H
}
+/* Generate to STREAM a code sequence that pops registers identified
+ in REGS_MASK from SP. SP is incremented as the result.
+*/
+static void
+print_pop_reg_by_ldr (FILE *stream, int regs_mask, int rfe)
+{
+ int reg;
+
+ gcc_assert (! (regs_mask & (1 << SP_REGNUM)));
+
+ for (reg = 0; reg < PC_REGNUM; ++reg)
+ if (regs_mask & (1 << reg))
+ asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+ reg, SP_REGNUM);
+
+ if (regs_mask & (1 << PC_REGNUM))
+ {
+ if (rfe)
+ /* When returning from exception, we need to
+ copy SPSR to CPSR. There are two ways to do
+ that: the ldm instruction with "^" suffix,
+ and movs instruction. The latter would
+ require that we load from stack to some
+ scratch register, and then move to PC.
+ Therefore, we'd need extra instruction and
+ have to make sure we actually have a spare
+ register. Using ldm with a single register
+ is simler. */
+ asm_fprintf (stream, "\tldm\tsp!, {pc}^\n");
+ else
+ asm_fprintf (stream, "\tldr\t%r, [%r], #4\n",
+ PC_REGNUM, SP_REGNUM);
+ }
+}
+
const char *
arm_output_epilogue (rtx sibling)
{
@@ -13798,22 +13911,19 @@ arm_output_epilogue (rtx sibling)
to load use the LDR instruction - it is faster. For Thumb-2
always use pop and the assembler will pick the best instruction.*/
if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
- && !IS_INTERRUPT(func_type))
+ && !IS_INTERRUPT (func_type))
{
asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
}
else if (saved_regs_mask)
{
- if (saved_regs_mask & (1 << SP_REGNUM))
- /* Note - write back to the stack register is not enabled
- (i.e. "ldmfd sp!..."). We know that the stack pointer is
- in the list of registers and if we add writeback the
- instruction becomes UNPREDICTABLE. */
- print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
- rfe);
- else if (TARGET_ARM)
- print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
- rfe);
+ gcc_assert ( ! (saved_regs_mask & (1 << SP_REGNUM)));
+ if (TARGET_ARM)
+ if (low_irq_latency)
+ print_pop_reg_by_ldr (f, saved_regs_mask, rfe);
+ else
+ print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
+ rfe);
else
print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
}
@@ -13934,6 +14044,32 @@ emit_multi_reg_push (unsigned long mask)
gcc_assert (num_regs && num_regs <= 16);
+ if (low_irq_latency)
+ {
+ rtx insn = 0;
+
+ /* Emit a series of ldr instructions rather rather than a single ldm. */
+ /* TODO: Use ldrd where possible. */
+ gcc_assert (! (mask & (1 << SP_REGNUM)));
+
+ for (i = LAST_ARM_REGNUM; i >= 0; --i)
+ {
+ if (mask & (1 << i))
+
+ {
+ rtx reg, where, mem;
+
+ reg = gen_rtx_REG (SImode, i);
+ where = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
+ mem = gen_rtx_MEM (SImode, where);
+ insn = emit_move_insn (mem, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ }
+
+ return insn;
+ }
+
/* We don't record the PC in the dwarf frame information. */
num_dwarf_regs = num_regs;
if (mask & (1 << PC_REGNUM))
Index: gcc/config/arm/lib1funcs.asm
===================================================================
--- gcc/config/arm/lib1funcs.asm (revision 154712)
+++ gcc/config/arm/lib1funcs.asm (working copy)
@@ -242,8 +242,8 @@ LSYM(Lend_fde):
.macro shift1 op, arg0, arg1, arg2
\op \arg0, \arg1, \arg2
.endm
-#define do_push push
-#define do_pop pop
+#define do_push(...) push {__VA_ARGS__}
+#define do_pop(...) pop {__VA_ARGS__}
#define COND(op1, op2, cond) op1 ## op2 ## cond
/* Perform an arithmetic operation with a variable shift operand. This
requires two instructions and a scratch register on Thumb-2. */
@@ -257,8 +257,42 @@ LSYM(Lend_fde):
.macro shift1 op, arg0, arg1, arg2
mov \arg0, \arg1, \op \arg2
.endm
-#define do_push stmfd sp!,
-#define do_pop ldmfd sp!,
+#if defined(__low_irq_latency__)
+#define do_push(...) \
+ _buildN1(do_push, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+#define _buildN1(BASE, X) _buildN2(BASE, X)
+#define _buildN2(BASE, X) BASE##X
+#define _buildC1(...) _buildC2(__VA_ARGS__,9,8,7,6,5,4,3,2,1)
+#define _buildC2(a1,a2,a3,a4,a5,a6,a7,a8,a9,c,...) c
+
+#define do_push1(r1) str r1, [sp, #-4]!
+#define do_push2(r1, r2) str r2, [sp, #-4]! ; str r1, [sp, #-4]!
+#define do_push3(r1, r2, r3) str r3, [sp, #-4]! ; str r2, [sp, #-4]!; str r1, [sp, #-4]!
+#define do_push4(r1, r2, r3, r4) \
+ do_push3 (r2, r3, r4);\
+ do_push1 (r1)
+#define do_push5(r1, r2, r3, r4, r5) \
+ do_push4 (r2, r3, r4, r5);\
+ do_push1 (r1)
+
+#define do_pop(...) \
+_buildN1(do_pop, _buildC1(__VA_ARGS__))( __VA_ARGS__)
+
+#define do_pop1(r1) ldr r1, [sp], #4
+#define do_pop2(r1, r2) ldr r1, [sp], #4 ; ldr r2, [sp], #4
+#define do_pop3(r1, r2, r3) ldr r1, [sp], #4 ; str r2, [sp], #4; str r3, [sp], #4
+#define do_pop4(r1, r2, r3, r4) \
+ do_pop1 (r1);\
+ do_pup3 (r2, r3, r4)
+#define do_pop5(r1, r2, r3, r4, r5) \
+ do_pop1 (r1);\
+ do_pop4 (r2, r3, r4, r5)
+#else
+#define do_push(...) stmfd sp!, { __VA_ARGS__}
+#define do_pop(...) ldmfd sp!, {__VA_ARGS__}
+#endif
+
+
#define COND(op1, op2, cond) op1 ## cond ## op2
.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
\name \dest, \src1, \src2, \shiftop \shiftreg
@@ -1142,7 +1176,7 @@ LSYM(Lover12):
ARM_FUNC_START div0
- do_push {r1, lr}
+ do_push (r1, lr)
mov r0, #SIGFPE
bl SYM(raise) __PLT__
RETLDM r1
@@ -1367,7 +1401,7 @@ FUNC_START clzdi2
push {r4, lr}
# else
ARM_FUNC_START clzdi2
- do_push {r4, lr}
+ do_push (r4, lr)
# endif
cmp xxh, #0
bne 1f
Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h (revision 154712)
+++ gcc/config/arm/arm.h (working copy)
@@ -95,6 +95,8 @@ extern char arm_arch_name[];
builtin_define ("__IWMMXT__"); \
if (TARGET_AAPCS_BASED) \
builtin_define ("__ARM_EABI__"); \
+ if (low_irq_latency) \
+ builtin_define ("__low_irq_latency__"); \
} while (0)
/* The various ARM cores. */
@@ -443,6 +445,10 @@ extern int arm_arch_thumb2;
/* Nonzero if chip supports integer division instruction. */
extern int arm_arch_hwdiv;
+/* Nonzero if we should minimize interrupt latency of the
+ generated code. */
+extern int low_irq_latency;
+
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_APCS_FRAME)
#endif
Index: gcc/config/arm/ieee754-sf.S
===================================================================
--- gcc/config/arm/ieee754-sf.S (revision 154712)
+++ gcc/config/arm/ieee754-sf.S (working copy)
@@ -481,7 +481,7 @@ LSYM(Lml_x):
and r3, ip, #0x80000000
@ Well, no way to make it shorter without the umull instruction.
- do_push {r3, r4, r5}
+ do_push (r3, r4, r5)
mov r4, r0, lsr #16
mov r5, r1, lsr #16
bic r0, r0, r4, lsl #16
@@ -492,7 +492,7 @@ LSYM(Lml_x):
mla r0, r4, r1, r0
adds r3, r3, r0, lsl #16
adc r1, ip, r0, lsr #16
- do_pop {r0, r4, r5}
+ do_pop (r0, r4, r5)
#else
@@ -882,7 +882,7 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmp
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
-6: do_push {r0, r1, r2, r3, lr}
+6: do_push (r0, r1, r2, r3, lr)
ARM_CALL cmpsf2
@ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0
Index: gcc/config/arm/bpabi.S
===================================================================
--- gcc/config/arm/bpabi.S (revision 154712)
+++ gcc/config/arm/bpabi.S (working copy)
@@ -68,16 +68,17 @@ ARM_FUNC_START aeabi_ulcmp
ARM_FUNC_START aeabi_ldivmod
sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly. */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
mov ip, sp
- push {ip, lr}
+ do_push (ip, lr)
#else
- do_push {sp, lr}
+ stmfd sp!, {sp, lr}
#endif
bl SYM(__gnu_ldivmod_helper) __PLT__
ldr lr, [sp, #4]
add sp, sp, #8
- do_pop {r2, r3}
+ do_pop (r2, r3)
RET
#endif /* L_aeabi_ldivmod */
@@ -86,16 +87,17 @@ ARM_FUNC_START aeabi_ldivmod
ARM_FUNC_START aeabi_uldivmod
sub sp, sp, #8
-#if defined(__thumb2__)
+/* Low latency and Thumb-2 do_push implementations can't push sp directly. */
+#if defined(__thumb2__) || defined(__irq_low_latency__)
mov ip, sp
- push {ip, lr}
+ do_push (ip, lr)
#else
- do_push {sp, lr}
+ stmfd sp!, {sp, lr}
#endif
bl SYM(__gnu_uldivmod_helper) __PLT__
ldr lr, [sp, #4]
add sp, sp, #8
- do_pop {r2, r3}
+ do_pop (r2, r3)
RET
#endif /* L_aeabi_divmod */
Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt (revision 154712)
+++ gcc/config/arm/arm.opt (working copy)
@@ -161,6 +161,10 @@ mvectorize-with-neon-quad
Target Report Mask(NEON_VECTORIZE_QUAD)
Use Neon quad-word (rather than double-word) registers for vectorization
+mlow-irq-latency
+Target Report Var(low_irq_latency)
+Try to reduce interrupt latency of the generated code
+
mword-relocations
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
Only generate absolute relocations on word sized values.
Index: gcc/config/arm/predicates.md
===================================================================
--- gcc/config/arm/predicates.md (revision 154712)
+++ gcc/config/arm/predicates.md (working copy)
@@ -303,6 +303,9 @@
HOST_WIDE_INT i = 1, base = 0;
rtx elt;
+ if (low_irq_latency)
+ return false;
+
if (count <= 1
|| GET_CODE (XVECEXP (op, 0, 0)) != SET)
return false;
@@ -360,6 +363,9 @@
HOST_WIDE_INT i = 1, base = 0;
rtx elt;
+ if (low_irq_latency)
+ return false;
+
if (count <= 1
|| GET_CODE (XVECEXP (op, 0, 0)) != SET)
return false;
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md (revision 154712)
+++ gcc/config/arm/arm.md (working copy)
@@ -6554,7 +6554,7 @@
(match_operand:BLK 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")]
- "TARGET_EITHER"
+ "TARGET_EITHER && !low_irq_latency"
"
if (TARGET_32BIT)
{