This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH]: ARM ISR stack frame fix


Hi everyone,

This is a patch to fix some really annoying stack bugs that prevented me from
writing using the __attribute__((interrupt("IRQ"))) extension of arm-gcc in any
useful way.

I'm submitting a diff against head, but I did all my testing on a
patch against a snapshot back in November, and no longer have access
to ARM hardware with which to test (for my development, I borrowed an
ARM7-based device for a class project which has since concluded).
I also only tested on 'irq' handlers, never on any other type of handler.

Not included in this patch is a hack I used to make dwarf output work
correctly for ISRs. I commented out the RTX_FRAME_RELATED_P line in

      if (IS_INTERRUPT (func_type))
	{
	  /* Interrupt functions must not corrupt any registers.
	     Creating a frame pointer however, corrupts the IP
	     register, so we must push it first.  */
	  insn = emit_multi_reg_push (1 << IP_REGNUM);
	  RTX_FRAME_RELATED_P (insn) = 1;
	}

to make files compile with '-g'. Unfortunately, I don't understand the
dwarf output sufficiently well to know what the right solution is.

-Alan Shieh

Problem

	In the prologue of an ISR with a frame pointer, ip is saved with
		str     ip, [sp, #-4]!
	the equivalent of a stmfd. However, ip is restored with
 		ldmea   sp, {ip}
	which results in both the incorrect value being restored in ip, as well
	as an improper stack adjustment (the stack leaks 1 word per invocation).
	The code should be
		ldmfd   sp, {ip}
	Note that the restoration of ip occurs in a different context than the
	restoration of all the other registers. In the latter case, the base
	pointer of the block transfer is pointing to the stack entry just above
	the data to be loaded, and in the latter case the base is pointing
	directly at the data.

	arm_compute_initial_elimination_offset did not account for the
	possibility of saving r0-r3 or ip on the stack during interrupt
	handlers. This caused reloads to be allocated to the same stack
	slots as saved registers.

Testcases

These large functions generate code that use r0-r3 and ip. Without the
patch, the stack offsets of the volatile variables in the normal function and
the ISR will be identical.


extern int bar();

int foo(int a, int b, int c, int d, int e, int f, int g, int h) {
  volatile int i=(a+b)-(g+h)+bar();
  volatile int j=(e+f)-(c+d);
  return a+b+c+d+e+f+g+h+i+j;
}

int foo1(int a, int b, int c, int d, int e, int f, int g, int h) __attribute__ ((interrupt("IRQ")));

int foo1(int a, int b, int c, int d, int e, int f, int g, int h) {
  volatile int i=(a+b)-(g+h)+bar();
  volatile int j=(e+f)-(c+d);
  return a+b+c+d+e+f+g+h+i+j;
}

*** Before the fix


	.file	"big-stack.c"
	.text
	.align	2
	.global	foo
	.type	foo,function
foo:
	@ args = 16, pretend = 0, frame = 24
	@ frame_needed = 1, current_function_anonymous_args = 0
	mov	ip, sp
	stmfd	sp!, {r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	sub	sp, sp, #24
	str	r0, [fp, #-20]
	str	r1, [fp, #-24]
	str	r2, [fp, #-28]
	str	r3, [fp, #-32]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r1, r2, r3
	ldr	r2, [fp, #12]
	ldr	r3, [fp, #16]
	add	r3, r2, r3
	rsb	r4, r3, r1
	bl	bar
	mov	r3, r0
	add	r3, r4, r3
	str	r3, [fp, #-36]
	ldr	r2, [fp, #4]
	ldr	r3, [fp, #8]
	add	r1, r2, r3
	ldr	r2, [fp, #-28]
	ldr	r3, [fp, #-32]
	add	r3, r2, r3
	rsb	r3, r3, r1
	str	r3, [fp, #-40]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r3, r2, r3
	ldr	r2, [fp, #-28]
	add	r3, r3, r2
	ldr	r2, [fp, #-32]
	add	r3, r3, r2
	ldr	r2, [fp, #4]
	add	r3, r3, r2
	ldr	r2, [fp, #8]
	add	r3, r3, r2
	ldr	r2, [fp, #12]
	add	r3, r3, r2
	ldr	r2, [fp, #16]
	add	r3, r3, r2
	ldr	r2, [fp, #-36]
	add	r3, r3, r2
	ldr	r2, [fp, #-40]
	add	r3, r3, r2
	mov	r0, r3
	ldmea	fp, {r4, fp, sp, pc}
.Lfe1:
	.size	foo,.Lfe1-foo
	.align	2
	.global	foo1
	.type	foo1,function
foo1:
	@ Interrupt Service Routine.
	@ args = 16, pretend = 0, frame = 24
	@ frame_needed = 1, current_function_anonymous_args = 0
	str	ip, [sp, #-4]!
	mov	ip, sp
	stmfd	sp!, {r0, r1, r2, r3, r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	sub	sp, sp, #24
	str	r0, [fp, #-20]
	str	r1, [fp, #-24]
	str	r2, [fp, #-28]
	str	r3, [fp, #-32]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r1, r2, r3
	ldr	r2, [fp, #12]
	ldr	r3, [fp, #16]
	add	r3, r2, r3
	rsb	r4, r3, r1
	bl	bar
	mov	r3, r0
	add	r3, r4, r3
	str	r3, [fp, #-36]
	ldr	r2, [fp, #4]
	ldr	r3, [fp, #8]
	add	r1, r2, r3
	ldr	r2, [fp, #-28]
	ldr	r3, [fp, #-32]
	add	r3, r2, r3
	rsb	r3, r3, r1
	str	r3, [fp, #-40]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r3, r2, r3
	ldr	r2, [fp, #-28]
	add	r3, r3, r2
	ldr	r2, [fp, #-32]
	add	r3, r3, r2
	ldr	r2, [fp, #4]
	add	r3, r3, r2
	ldr	r2, [fp, #8]
	add	r3, r3, r2
	ldr	r2, [fp, #12]
	add	r3, r3, r2
	ldr	r2, [fp, #16]
	add	r3, r3, r2
	ldr	r2, [fp, #-36]
	add	r3, r3, r2
	ldr	r2, [fp, #-40]
	add	r3, r3, r2
	mov	r0, r3
	ldmea	fp, {r0, r1, r2, r3, r4, fp, sp, lr}
	ldmea	sp, {ip}
	subs	pc, lr, #4
.Lfe2:
	.size	foo1,.Lfe2-foo1
	.ident	"GCC: (GNU) 3.1 20011220 (experimental)"

Note that the offsets for the reloads and locals are not adjusted properly in
the ISR (there are 5 more registers pushed onto the stack in the ISR, yet the
offsets are identical)

***** After the fix


	.file	"big-stack.c"
	.text
	.align	2
	.global	foo
	.type	foo,function
foo:
	@ args = 16, pretend = 0, frame = 24
	@ frame_needed = 1, current_function_anonymous_args = 0
	mov	ip, sp
	stmfd	sp!, {r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	sub	sp, sp, #24
	str	r0, [fp, #-20]
	str	r1, [fp, #-24]
	str	r2, [fp, #-28]
	str	r3, [fp, #-32]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r1, r2, r3
	ldr	r2, [fp, #12]
	ldr	r3, [fp, #16]
	add	r3, r2, r3
	rsb	r4, r3, r1
	bl	bar
	mov	r3, r0
	add	r3, r4, r3
	str	r3, [fp, #-36]
	ldr	r2, [fp, #4]
	ldr	r3, [fp, #8]
	add	r1, r2, r3
	ldr	r2, [fp, #-28]
	ldr	r3, [fp, #-32]
	add	r3, r2, r3
	rsb	r3, r3, r1
	str	r3, [fp, #-40]
	ldr	r2, [fp, #-20]
	ldr	r3, [fp, #-24]
	add	r3, r2, r3
	ldr	r2, [fp, #-28]
	add	r3, r3, r2
	ldr	r2, [fp, #-32]
	add	r3, r3, r2
	ldr	r2, [fp, #4]
	add	r3, r3, r2
	ldr	r2, [fp, #8]
	add	r3, r3, r2
	ldr	r2, [fp, #12]
	add	r3, r3, r2
	ldr	r2, [fp, #16]
	add	r3, r3, r2
	ldr	r2, [fp, #-36]
	add	r3, r3, r2
	ldr	r2, [fp, #-40]
	add	r3, r3, r2
	mov	r0, r3
	ldmea	fp, {r4, fp, sp, pc}
.Lfe1:
	.size	foo,.Lfe1-foo
	.align	2
	.global	foo1
	.type	foo1,function
foo1:
	@ Interrupt Service Routine.
	@ args = 16, pretend = 0, frame = 24
	@ frame_needed = 1, current_function_anonymous_args = 0
	str	ip, [sp, #-4]!
	mov	ip, sp
	stmfd	sp!, {r0, r1, r2, r3, r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	sub	sp, sp, #24
	str	r0, [fp, #-44]
	str	r1, [fp, #-48]
	str	r2, [fp, #-52]
	str	r3, [fp, #-56]
	ldr	r2, [fp, #-44]
	ldr	r3, [fp, #-48]
	add	r1, r2, r3
	ldr	r2, [fp, #12]
	ldr	r3, [fp, #16]
	add	r3, r2, r3
	rsb	r4, r3, r1
	bl	bar
	mov	r3, r0
	add	r3, r4, r3
	str	r3, [fp, #-60]
	ldr	r2, [fp, #4]
	ldr	r3, [fp, #8]
	add	r1, r2, r3
	ldr	r2, [fp, #-52]
	ldr	r3, [fp, #-56]
	add	r3, r2, r3
	rsb	r3, r3, r1
	str	r3, [fp, #-64]
	ldr	r2, [fp, #-44]
	ldr	r3, [fp, #-48]
	add	r3, r2, r3
	ldr	r2, [fp, #-52]
	add	r3, r3, r2
	ldr	r2, [fp, #-56]
	add	r3, r3, r2
	ldr	r2, [fp, #4]
	add	r3, r3, r2
	ldr	r2, [fp, #8]
	add	r3, r3, r2
	ldr	r2, [fp, #12]
	add	r3, r3, r2
	ldr	r2, [fp, #16]
	add	r3, r3, r2
	ldr	r2, [fp, #-60]
	add	r3, r3, r2
	ldr	r2, [fp, #-64]
	add	r3, r3, r2
	mov	r0, r3
	ldmea	fp, {r0, r1, r2, r3, r4, fp, sp, lr}
	ldmfd	sp, {ip}
	subs	pc, lr, #4
.Lfe2:
	.size	foo1,.Lfe2-foo1
	.ident	"GCC: (GNU) 3.1 20011220 (experimental)"


ChangeLog

2001-12-20 Alan Shieh <ashieh@hkn.eecs.berkeley.edu>

	* config/arm/arm.c (arm_output_epilogue): Changed IP restore
	  to use ldmfd instead of ldmea.
	* config/arm/arm.c (arm_compute_initial_elimination_offset):
	  Modified to reflect behavior of arm_expand_prologue when generating
	  interrupt handlers

Bootstrap

	Builds on cygwin with gcc 2.95.3-5

Patch

Index: gcc/config/arm/arm.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.180
diff -c -3 -p -r1.180 arm.c
*** gcc/config/arm/arm.c	17 Dec 2001 15:05:27 -0000	1.180
--- gcc/config/arm/arm.c	20 Dec 2001 13:01:23 -0000
*************** arm_output_epilogue (really_return)
*** 7535,7541 ****
        if (IS_INTERRUPT (func_type))
  	/* Interrupt handlers will have pushed the
  	   IP onto the stack, so restore it now.  */
! 	print_multi_reg (f, "ldmea\t%r", SP_REGNUM, 1 << IP_REGNUM);
      }
    else
      {
--- 7535,7541 ----
        if (IS_INTERRUPT (func_type))
  	/* Interrupt handlers will have pushed the
  	   IP onto the stack, so restore it now.  */
! 	print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, 1 << IP_REGNUM);
      }
    else
      {
*************** arm_compute_initial_elimination_offset (
*** 7948,7953 ****
--- 7948,7979 ----
      {
        unsigned int reg;

+       if (IS_INTERRUPT (func_type) )
+     {
+       unsigned int max_reg;
+       /* This is based on code from arm_compute_save_reg_mask */
+
+       /* Interrupt functions must not corrupt any registers,
+ 	 even call clobbered ones.  If this is a leaf function
+ 	 we can just examine the registers used by the RTL, but
+ 	 otherwise we have to assume that whatever function is
+ 	 called might clobber anything, and so we have to save
+ 	 all the call-clobbered registers as well.  */
+       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
+ 	/* FIQ handlers have registers r8 - r12 banked, so
+ 	   we only need to check r0 - r7, Normal ISRs only
+ 	   bank r14 and r15, so ew must check up to r12.
+ 	   r13 is the stack pointer which is always preserved,
+ 	   so we do not need to consider it here.  */
+ 	max_reg = 7;
+       else
+ 	max_reg = 12;
+
+       for (reg = 0; reg <= max_reg; reg++)
+ 	if (regs_ever_live[reg]
+ 	    || (! current_function_is_leaf && call_used_regs [reg]))
+ 	  call_saved_registers += 4;
+     } else {
        /* In theory we should check all of the hard registers to
  	 see if they will be saved onto the stack.  In practice
  	 registers 11 upwards have special meanings and need to
*************** arm_compute_initial_elimination_offset (
*** 7955,7961 ****
        for (reg = 0; reg <= 10; reg ++)
  	if (regs_ever_live[reg] && ! call_used_regs[reg])
  	  call_saved_registers += 4;
!
        /* Determine if register 11 will be clobbered.  */
        if (! TARGET_APCS_FRAME
  	  && ! frame_pointer_needed
--- 7981,7987 ----
        for (reg = 0; reg <= 10; reg ++)
  	if (regs_ever_live[reg] && ! call_used_regs[reg])
  	  call_saved_registers += 4;
!     }
        /* Determine if register 11 will be clobbered.  */
        if (! TARGET_APCS_FRAME
  	  && ! frame_pointer_needed

Attachment: arm-isr-stack-fix.patch
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]