This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Linux/PPC gcc 2.95.2 - bad access generated when using inline func


GCC and system version
---
Reading specs from /usr/lib/gcc-lib/powerpc-unknown-linux-gnu/2.95.2/specs
gcc version 2.95.2 19991024 (release)
cort@medea<cort>$ uname -a
Linux medea.fsmlabs.com 2.2.17pre5 #8 SMP Sun Jun 25 00:09:10 MDT 2000 ppc unknown

GCC command line and files
---
gcc -I/sys/linux/include -I/sys/rtlinux/include -I/sys/rtlinux/include/compat  -I/sys/rtlinux/include/posix -Wall -Wstrict-prototypes -O2 -g -D__RTL__ -D__KERNEL__ -DMODULE -D__powerpc__ -fsigned-char -msoft-float -pipe -fno-builtin -ffixed-r2 -Wno-uninitialized -mmultiple -mstring -DMODVERSIONS -include /sys/linux/include/linux/modversions.h  -D__powerpc__  -c rtl_core.c -o rtl_core.o

Preprocessed output is attached (rtl_core.i.bz2)

I'm also attaching bitops.h and rtl_core.c.

The problem
---
Calling change_bit() when it's an inline will cause a load from a bad
address when change_bit() is called from a leaf function.  If a call to a
function (even an empty one) in the same function that calls change_bit()
then no bad access happens.

I switched the Linux/PPC bitops (file attached) to be inline rather than
actual function calls.  Now, RTLinux dies when I run it.

The problem shows up in rtl_soft_cli() when it does a L_CLEAR() which is a
define that does a change_bit() (which is now inline).  As the function
rtl_soft_cli() is now gcc produces asm that tries a load from a bad
address.  If I make rtl_soft_cli() a non-leaf function (by making a call to
an empty function) the code runs without trouble.  The only difference in
the asm generated is the prologue/epilogue save/restore of some general
purpose registers.

I've checked the constraints in change_bit() and think they're correct.  I
included bitops.h in case they're not.

I can provide the whole source tree for RTL (it's small) and a recipe to
reproduce the problem and workaround if you need.

rtl_core.i.bz2

/*
 * $Id: bitops.h,v 1.12 2000/02/09 03:28:31 davem Exp $
 * bitops.h: Bit string operations on the ppc
 */

#ifndef _PPC_BITOPS_H
#define _PPC_BITOPS_H

#include <linux/config.h>
#include <asm/byteorder.h>

extern void set_bit(int nr, volatile void *addr);
extern void clear_bit(int nr, volatile void *addr);
extern void change_bit(int nr, volatile void *addr);
extern int test_and_set_bit(int nr, volatile void *addr);
extern int test_and_clear_bit(int nr, volatile void *addr);
extern int test_and_change_bit(int nr, volatile void *addr);

/*
 * Arguably these bit operations don't imply any memory barrier or
 * SMP ordering, but in fact a lot of drivers expect them to imply
 * both, since they do on x86 cpus.
 */
#ifdef CONFIG_SMP
#define SMP_WMB		"eieio\n"
#define SMP_MB		"\nsync"
#else
#define SMP_WMB
#define SMP_MB
#endif /* CONFIG_SMP */

#define __INLINE_BITOPS	1

#if __INLINE_BITOPS
/*
 * These used to be if'd out here because using : "cc" as a constraint
 * resulted in errors from egcs.  Things may be OK with gcc-2.95.
 */
extern __inline__ void set_bit(int nr, volatile void * addr)
{
	unsigned long old;
	unsigned long mask = 1 << (nr & 0x1f);
	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);
	
	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%3
	or	%0,%0,%2
	stwcx.	%0,0,%3
	bne	1b"
	SMP_MB
	: "=&r" (old), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc" );
}

extern __inline__ void clear_bit(int nr, volatile void *addr)
{
	unsigned long old;
	unsigned long mask = 1 << (nr & 0x1f);
	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);

	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%3
	andc	%0,%0,%2
	stwcx.	%0,0,%3
	bne	1b"
	SMP_MB
	: "=&r" (old), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc", "memory");
}

extern __inline__ void change_bit(int nr, volatile void *addr)
{
	unsigned long old;
	unsigned long mask = 1 << (nr & 0x1f);
	unsigned long *p = ((unsigned long *)addr) + (nr >> 5);

	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%3
	xor	%0,%0,%2
	stwcx.	%0,0,%3
	bne	1b"
	SMP_MB
	: "=&r" (old), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc");
}

extern __inline__ int test_and_set_bit(int nr, volatile void *addr)
{
	unsigned int old, t;
	unsigned int mask = 1 << (nr & 0x1f);
	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);

	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%4
	or	%1,%0,%3
	stwcx.	%1,0,%4
	bne	1b"
	SMP_MB
	: "=&r" (old), "=&r" (t), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc");

	return (old & mask) != 0;
}

extern __inline__ int test_and_clear_bit(int nr, volatile void *addr)
{
	unsigned int old, t;
	unsigned int mask = 1 << (nr & 0x1f);
	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);

	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%4
	andc	%1,%0,%3
	stwcx.	%1,0,%4
	bne	1b"
	SMP_MB
	: "=&r" (old), "=&r" (t), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc");

	return (old & mask) != 0;
}

extern __inline__ int test_and_change_bit(int nr, volatile void *addr)
{
	unsigned int old, t;
	unsigned int mask = 1 << (nr & 0x1f);
	volatile unsigned int *p = ((volatile unsigned int *)addr) + (nr >> 5);

	__asm__ __volatile__(SMP_WMB "\
1:	lwarx	%0,0,%4
	xor	%1,%0,%3
	stwcx.	%1,0,%4
	bne	1b"
	SMP_MB
	: "=&r" (old), "=&r" (t), "=m" (*p)
	: "r" (mask), "r" (p), "m" (*p)
	: "cc");

	return (old & mask) != 0;
}
#endif /* __INLINE_BITOPS */

extern __inline__ int test_bit(int nr, __const__ volatile void *addr)
{
	__const__ unsigned int *p = (__const__ unsigned int *) addr;

	return ((p[nr >> 5] >> (nr & 0x1f)) & 1) != 0;
}

/* Return the bit position of the most significant 1 bit in a word */
extern __inline__ int __ilog2(unsigned int x)
{
	int lz;

	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
	return 31 - lz;
}

extern __inline__ int ffz(unsigned int x)
{
	if ((x = ~x) == 0)
		return 32;
	return __ilog2(x & -x);
}

#ifdef __KERNEL__

/*
 * ffs: find first bit set. This is defined the same way as
 * the libc and compiler builtin ffs routines, therefore
 * differs in spirit from the above ffz (man ffs).
 */
extern __inline__ int ffs(int x)
{
	return __ilog2(x & -x) + 1;
}

/*
 * hweightN: returns the hamming weight (i.e. the number
 * of bits set) of a N-bit word
 */

#define hweight32(x) generic_hweight32(x)
#define hweight16(x) generic_hweight16(x)
#define hweight8(x) generic_hweight8(x)

#endif /* __KERNEL__ */

/*
 * This implementation of find_{first,next}_zero_bit was stolen from
 * Linus' asm-alpha/bitops.h.
 */
#define find_first_zero_bit(addr, size) \
	find_next_zero_bit((addr), (size), 0)

extern __inline__ unsigned long find_next_zero_bit(void * addr,
	unsigned long size, unsigned long offset)
{
	unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
	unsigned int result = offset & ~31UL;
	unsigned int tmp;

	if (offset >= size)
		return size;
	size -= result;
	offset &= 31UL;
	if (offset) {
		tmp = *p++;
		tmp |= ~0UL >> (32-offset);
		if (size < 32)
			goto found_first;
		if (tmp != ~0U)
			goto found_middle;
		size -= 32;
		result += 32;
	}
	while (size >= 32) {
		if ((tmp = *p++) != ~0U)
			goto found_middle;
		result += 32;
		size -= 32;
	}
	if (!size)
		return result;
	tmp = *p;
found_first:
	tmp |= ~0UL << size;
found_middle:
	return result + ffz(tmp);
}


#define _EXT2_HAVE_ASM_BITOPS_

#ifdef __KERNEL__
/*
 * test_and_{set,clear}_bit guarantee atomicity without
 * disabling interrupts.
 */
#define ext2_set_bit(nr, addr)		test_and_set_bit((nr) ^ 0x18, addr)
#define ext2_clear_bit(nr, addr)	test_and_clear_bit((nr) ^ 0x18, addr)

#else
extern __inline__ int ext2_set_bit(int nr, void * addr)
{
	int		mask;
	unsigned char	*ADDR = (unsigned char *) addr;
	int oldbit;

	ADDR += nr >> 3;
	mask = 1 << (nr & 0x07);
	oldbit = (*ADDR & mask) ? 1 : 0;
	*ADDR |= mask;
	return oldbit;
}

extern __inline__ int ext2_clear_bit(int nr, void * addr)
{
	int		mask;
	unsigned char	*ADDR = (unsigned char *) addr;
	int oldbit;

	ADDR += nr >> 3;
	mask = 1 << (nr & 0x07);
	oldbit = (*ADDR & mask) ? 1 : 0;
	*ADDR = *ADDR & ~mask;
	return oldbit;
}
#endif	/* __KERNEL__ */

extern __inline__ int ext2_test_bit(int nr, __const__ void * addr)
{
	__const__ unsigned char	*ADDR = (__const__ unsigned char *) addr;

	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
}

/*
 * This implementation of ext2_find_{first,next}_zero_bit was stolen from
 * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
 */

#define ext2_find_first_zero_bit(addr, size) \
        ext2_find_next_zero_bit((addr), (size), 0)

extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr,
	unsigned long size, unsigned long offset)
{
	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
	unsigned int result = offset & ~31UL;
	unsigned int tmp;

	if (offset >= size)
		return size;
	size -= result;
	offset &= 31UL;
	if (offset) {
		tmp = cpu_to_le32p(p++);
		tmp |= ~0UL >> (32-offset);
		if (size < 32)
			goto found_first;
		if (tmp != ~0U)
			goto found_middle;
		size -= 32;
		result += 32;
	}
	while (size >= 32) {
		if ((tmp = cpu_to_le32p(p++)) != ~0U)
			goto found_middle;
		result += 32;
		size -= 32;
	}
	if (!size)
		return result;
	tmp = cpu_to_le32p(p);
found_first:
	tmp |= ~0U << size;
found_middle:
	return result + ffz(tmp);
}

/* Bitmap functions for the minix filesystem.  */
#define minix_set_bit(nr,addr) ext2_set_bit(nr,addr)
#define minix_clear_bit(nr,addr) ext2_clear_bit(nr,addr)
#define minix_test_bit(nr,addr) ext2_test_bit(nr,addr)
#define minix_find_first_zero_bit(addr,size) ext2_find_first_zero_bit(addr,size)

#endif /* _PPC_BITOPS_H */
/*
 * RTL core features.
 *
 * Copyright (C) 1999-2000 FSM Labs (http://www.fsmlabs.com/)
 *  Written by Cort Dougan <cort@fsmlabs.com>,
 *  Victor Yodaiken <yodaiken@fsmlabs.com> and
 *  Michael Barabanov <baraban@fsmlabs.com>
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/malloc.h>
#include <linux/timex.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/console.h>
#include <linux/irq.h>
#include <linux/config.h>

#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/irq.h>
#include <asm/segment.h>

#include <arch/constants.h>
#include <rtl_conf.h>
#include <rtl_printf.h>
#include <rtl_core.h>
#include <rtl_sync.h>
#include <rtl.h>
#include <rtl_debug.h>
#include <linux/irq.h>

void rtl_hard_disable_irq(unsigned int ix);
void rtl_debug(void);
static inline void debug_test_enabled(char *s);
unsigned long last_cli;

unsigned rtl_reserved_cpumask = 0;

void conpr(const char *s)
{
	long flags;
	static spinlock_t rtl_conpr_lock = SPIN_LOCK_UNLOCKED;
	struct console *c;
	int len = strlen(s);

	rtl_hard_savef_and_cli(flags);
	spin_lock(&rtl_conpr_lock);

	c = console_drivers;
	while(c) {
		if ((c->flags & CON_ENABLED) && c->write)
			c->write(c, s, len);
		c = c->next;
	}
	spin_unlock(&rtl_conpr_lock);
	rtl_hard_restore_flags(flags);
}

void conprn(const unsigned int hexnum)
{
	int i;
	unsigned int d;
	unsigned int n = hexnum;
    	char s[10];
	s[9] = 0;
	s[8] = ' ';
	for (i=7; i>=0; i--) {
		d = n % 16;
		if (d < 10) {
			d += '0';
		} else {
			d += 'a' - 10;
		}
		s[i] = d; 
		n = n / 16;
	}
    	conpr(s);
}

/* assuming 255 global irqs and 31 max local vectors 
   On the x86 we only have local irqs when we are smp.
   But in PowerPC and other we may have local irqs from
   on chip timers and other advanced technologies
   */

/* bit positions for flags, constants, and macros for global structure */
#define IRQ_NOT_VALID -1
#define IRQ_ARRAY_SIZE ((256/8)/sizeof(ulong)) /*256 global irqs */
#define IRQ_ZINIT {0}
#if BITS_PER_LONG == 32
#define IRQ_NZINIT {~0x0,~0x0,~0x0,~0x0,~0x0,~0x0,~0x0,~0x0}
#else
#define IRQ_NZINIT {~0x0,~0x0,~0x0,~0x0}
#endif

/* fix if IRQ_ARRAY_SIZE !=4  */
#if BITS_PER_LONG == 32
#define irq_toi(x)  ((x>>5)&7)
#define irq_top(x)  ((x)& 0x1fUL)
#else
#define irq_toi(x)  ((x>>6)&15)
#define irq_top(x)  ((x)& 0x3fUL)
#endif
#define pi_toirq(p,i) ( (p) + ((i)*BITS_PER_LONG))

#define G_PEND(f) set_bit(irq_top(f),&rtl_global.pending[irq_toi(f)])
#define G_UNPEND(f) clear_bit(irq_top(f),&rtl_global.pending[irq_toi(f)])
#define G_ISPEND(f) test_bit(irq_top(f),&rtl_global.pending[irq_toi(f)])
/* clear and set global enabled irq bits */
#define G_ENABLED(f) set_bit(irq_top(f),&rtl_global.soft_enabled[irq_toi(f)])
#define G_DISABLE(f) clear_bit(irq_top(f),&rtl_global.soft_enabled[irq_toi(f)])
#define G_ISENABLED(f) test_bit(irq_top(f),&rtl_global.soft_enabled[irq_toi(f)])
/* clear and set real time handlers (RealTimeHandlers) */
#define G_SET_RTH(f) set_bit(irq_top(f),&rtl_global.rtirq[irq_toi(f)])
#define G_CLEAR_RTH(f) clear_bit(irq_top(f),&rtl_global.rtirq[irq_toi(f)])
#define G_TEST_RTH(f) test_bit(irq_top(f),&rtl_global.rtirq[irq_toi(f)])
#define G_TEST_AND_SET_RTH(f) test_and_set_bit(irq_top(f),&rtl_global.rtirq[irq_toi(f)])
#define G_TEST_AND_CLEAR_RTH(f) test_and_clear_bit(irq_top(f),&rtl_global.rtirq[irq_toi(f)])
/* global flags */
#define g_rtl_started 0
#define g_pend_since_sti 1
#define g_initializing 2
#define g_initialized 3
#define G_SET(f) set_bit(f,&rtl_global.flags)
#define G_CLEAR(f) clear_bit(f,&rtl_global.flags)
#define G_TEST(f) test_bit(f,&rtl_global.flags)
#define G_TEST_AND_SET(f) test_and_set_bit(f,&rtl_global.flags)
#define G_TEST_AND_CLEAR(f) test_and_clear_bit(f,&rtl_global.flags)

/* Bit positions of flags for local structure and macros
   for operating on them
 */
#define l_idle 0
#define l_ienable 1
#define l_pend_since_sti 2
#define l_busy 3
#define L_SET(f) set_bit(f,&rtl_local[cpu_id].flags)
#define L_CLEAR(f) clear_bit(f,&rtl_local[cpu_id].flags)
#define L_TEST(f) test_bit(f,&rtl_local[cpu_id].flags)
#define L_TEST_AND_SET(f) test_and_set_bit(f,&rtl_local[cpu_id].flags)
#define L_PEND(f) set_bit(f,&rtl_local[cpu_id].pending)
#define L_UNPEND(f) clear_bit(f,&rtl_local[cpu_id].pending)
#define L_ISPEND(f) test_bit(f,&rtl_local[cpu_id].pending)

#define L_SET_RTH(f) set_bit(f,&rtl_local[cpu_id].rtirq)
#define L_CLEAR_RTH(f) clear_bit(f,&rtl_local[cpu_id].rtirq)
#define L_TEST_RTH(f) test_bit(f,&rtl_local[cpu_id].rtirq)
#define L_TEST_AND_SET_RTH(f) test_and_set_bit(f,&rtl_local[cpu_id].rtirq)
#define L_TEST_AND_CLEAR_RTH(f) test_and_clear_bit(f,&rtl_local[cpu_id].rtirq)

#define dispatch_rtl_handler(irq,r) rtl_global_handlers[irq].handler(irq,r)

/* TODO soft smp_processor_id doesn't work here???? -- Michael */
#define DeclareAndInit(cpu_id)  unsigned int cpu_id = rtl_getcpuid()
#define HardDeclareAndInit(cpu_id)  unsigned int cpu_id = rtl_getcpuid()

/* The basic control data structures local and global*/
struct rtl_local rtl_local[NR_CPUS];

struct rtl_global{
	spinlock_t hard_irq_controller_lock;
	unsigned long flags;
	unsigned long pending[IRQ_ARRAY_SIZE];
	unsigned long soft_enabled[IRQ_ARRAY_SIZE];
	unsigned long rtirq[IRQ_ARRAY_SIZE];
};
struct rtl_global rtl_global ={ SPIN_LOCK_UNLOCKED,0,IRQ_ZINIT,IRQ_NZINIT,IRQ_ZINIT} ;

/* rtl interrupts */
struct rtl_global_handlers{
        unsigned int (*handler)(unsigned int irq, struct pt_regs *r);
}rtl_global_handlers[IRQ_MAX_COUNT];

#ifdef __LOCAL_IRQS__
void rtl_local_pend_vec(int vector,int cpu_id)
{
	int i = VECTOR_TO_LOCAL_PND(vector);
	L_PEND(i);
	L_SET(l_pend_since_sti);
}

int rtl_local_ispending_irq(int ix)
{
 	HardDeclareAndInit(cpu_id);
	return L_ISPEND(ix);
}
#endif

extern void * rtl_code[];
#include "arch/arch.h"

static inline void debug_test_enabled(char *s)
{
	unsigned long flags;

	rtl_hard_save_flags(flags);
	if ( (ARCH_DEFINED_ENABLE && !(flags & ARCH_DEFINED_ENABLE))
	     || (!ARCH_DEFINED_ENABLE && (flags & ARCH_DEFINED_DISABLE)) )
	{
		do_first(10) {
			rtl_printf("%s: intrs hard disabled! called from %p\n",\
			   s, __builtin_return_address(0));
		}
	}
}
/* rtl_intercept intercepts global interrupts */
#define RUN_LINUX_HANDLER(irq) (G_ISPEND(irq) && !L_TEST(l_busy)\
       	&& L_TEST(l_ienable) && G_ISENABLED(irq))
intercept_t rtl_intercept(MACHDEPREGS regs)
{
	int irq;
	HardDeclareAndInit(cpu_id);
	rtl_spin_lock(&rtl_global.hard_irq_controller_lock);
	if ((irq = rtl_irq_controller_get_irq(regs)) != -1)
	{
		rtl_irq_controller_ack(irq); /* may also mask, if needed */
		
		if(G_TEST_RTH(irq)){ /* this is a RT irq */
			/* if RT wants to share it pends */
			rtl_spin_unlock(&rtl_global.hard_irq_controller_lock);
			dispatch_rtl_handler(irq,MACHDEPREGS_PTR(regs));
			rtl_spin_lock(&rtl_global.hard_irq_controller_lock);
		} else {
			G_PEND(irq);
			G_SET(g_pend_since_sti);
		}
		if(RUN_LINUX_HANDLER(irq))
		{
			/* unpend so dispatch doesn't dispatch 2 times*/
			G_UNPEND(irq); 
			L_CLEAR(l_ienable); /* disable local soft interrupts */
			G_DISABLE(irq); /* disable this irq */
			rtl_irq_controller_postirq(MACHDEPREGS_PTR(regs),irq);
			rtl_spin_unlock(&rtl_global.hard_irq_controller_lock);
			rtl_hard_sti(); /* yikes ! */
			dispatch_linux_irq(MACHDEPREGS_PTR(regs),irq);
	/* Linux enters ret_from_intr with interrrupts still disabled -- Michael */
/*  			rtl_soft_sti();  */
			RETURN_FROM_INTERRUPT_LINUX; /* goes via ret_from_intr */
		}
		rtl_irq_controller_postirq(MACHDEPREGS_PTR(regs),irq);
	}
	/* get here if irq==-1 or if otherwise can't run linux handler */
	rtl_spin_unlock(&rtl_global.hard_irq_controller_lock);
	RETURN_FROM_INTERRUPT;
}

#ifdef __LOCAL_IRQS__
static inline unsigned int get_lpended_irq(void)
{
	int i;
	DeclareAndInit(cpu_id); /* only called in Linux context */
	if(rtl_local[cpu_id].pending){
		i = ffz(~rtl_local[cpu_id].pending);
		clear_bit(i,&rtl_local[cpu_id].pending);
		i = LOCAL_PND_TO_VECTOR(i);
	}
	else i = IRQ_NOT_VALID;
	return i ;
}

intercept_t rtl_local_intercept(MACHDEPREGS regs)
{
	int pnd;

	HardDeclareAndInit(cpu_id);
	/* no lock needed because we are already hard cli and only
	   use local per-cpu structures. The rtl_irq_controller
	   operations MUST vector to local only hardware or must
	   use spinlocks */
	pnd = MACHDEPREGS_TO_PND(regs);
	rtl_local_irq_controller_ack();
	if(L_TEST_RTH(pnd)){ /* this is a RT irq */
		dispatch_rtl_local_handler(pnd,MACHDEPREGS_PTR(regs));/* if RT wants to share it pends */
	}
	else{
		L_PEND(pnd);
/*		if (regs.orig_eax == LOCAL_TIMER_VECTOR) {
			if (rtl_getcpuid()) conpr("1");
			else conpr("0");
		} */
		L_SET(l_pend_since_sti);
	}
	/* VY just removed test for soft local disabled. I don't
	   think we have soft local disabled */
	if(!L_ISPEND(pnd) || L_TEST(l_busy) || !L_TEST(l_ienable) )
	{
		RETURN_FROM_LOCAL;
	}
	else
	{
		L_UNPEND(pnd); /* yes it is stupid, see above */
		L_CLEAR(l_ienable); /* disable local soft interrupts */
		rtl_hard_sti(); /* yikes ! */
		dispatch_local_linux_irq(MACHDEPREGS_PTR(regs));
	}
	RETURN_FROM_LOCAL_LINUX;
}
#endif

/* tools for soft_sti */
static inline unsigned int get_gpended_irq(void)
{
	unsigned int i, j;
	rtl_irqstate_t flags;
	unsigned long irqs;

	rtl_spin_lock_irqsave(&rtl_global.hard_irq_controller_lock, flags);
	for (i=0; i < IRQ_ARRAY_SIZE; i++) {
		irqs = rtl_global.pending[i] & rtl_global.soft_enabled[i];
		if (!irqs)
			continue;
		j = ffz(~irqs);
		clear_bit(j, &rtl_global.pending[i]);
		rtl_spin_unlock_irqrestore(&rtl_global.hard_irq_controller_lock, flags);
		return pi_toirq (j, i);
	}
	rtl_spin_unlock_irqrestore(&rtl_global.hard_irq_controller_lock, flags);
	return IRQ_NOT_VALID;
}

void rtl_soft_cli(void)
{
	DeclareAndInit(cpu_id);
	if ( L_TEST(l_ienable) )
	     last_cli = (unsigned long)__builtin_return_address(0);
	L_CLEAR(l_ienable);
}

void rtl_soft_sti_no_emulation(void)
{
	DeclareAndInit(cpu_id);
	if ( !L_TEST(l_ienable) )
	     last_cli = 0;
	L_SET(l_ienable);
}

#define RTL_MAX_IRQ_DEPTH 25

void rtl_process_pending(void)
{
	int irq = 0;
	int last_irq = 0;
	int dispatch_count = 0;
	static int depth_count = 0;
	DeclareAndInit(cpu_id);
	
	L_CLEAR(l_ienable); /*disable soft interrupts !*/
       	do{
		irq = IRQ_NOT_VALID;
	       	G_CLEAR(g_pend_since_sti);
	       	L_CLEAR(l_pend_since_sti);
#ifdef __LOCAL_IRQS__
	       	while ( (irq = get_lpended_irq()) != IRQ_NOT_VALID ) {
			soft_dispatch_local(irq);
		}
#endif
#ifdef __RTL_LOCALIRQS__
	if (!test_bit(cpu_id, &rtl_reserved_cpumask))
#endif
	       	while ( (irq = get_gpended_irq()) != IRQ_NOT_VALID )
		{
			++depth_count;
			++dispatch_count;
			last_irq = irq;
			soft_dispatch_global(irq);
			--depth_count;
		}
#ifdef __RTL_LOCALIRQS__	
	}while(irq != IRQ_NOT_VALID || (!test_bit(cpu_id, &rtl_reserved_cpumask) && G_TEST(g_pend_since_sti)) || L_TEST(l_pend_since_sti));
#else
	}while(irq != IRQ_NOT_VALID || G_TEST(g_pend_since_sti) || L_TEST(l_pend_since_sti));
#endif
	
	/* process any bottom halves */
	if (  softirq_state[cpu_id].active &
	      softirq_state[cpu_id].mask )
		do_softirq();
}

void rtl_soft_sti(void)
{
	DeclareAndInit(cpu_id);
	/*debug_test_enabled("rtl_soft_sti");*/
	if ( L_TEST(l_pend_since_sti) || G_TEST(g_pend_since_sti) ||
	   (softirq_state[cpu_id].active & softirq_state[cpu_id].mask) )
		rtl_process_pending();
	rtl_soft_sti_no_emulation();
}

void rtl_soft_save_flags(unsigned long *x)
{
	DeclareAndInit(cpu_id);
	*x = (L_TEST(l_ienable)? ARCH_DEFINED_ENABLE: ARCH_DEFINED_DISABLE);
}

void rtl_soft_restore_flags(unsigned long x)
{
	if(x == ARCH_DEFINED_ENABLE)rtl_soft_sti();
	else rtl_soft_cli();
}

void rtl_soft_local_irq_save(unsigned long *x)
{
	rtl_soft_save_flags(x);
	rtl_soft_cli();
}

void rtl_soft_local_irq_restore(unsigned long x)
{
	rtl_soft_restore_flags(x);
}

void rtl_virt_disable(unsigned int irq)
{
	G_DISABLE(irq);
}

void rtl_virt_enable(unsigned int irq)
{
	G_ENABLED(irq);
	if(!G_ISPEND(irq)){
		rtl_hard_enable_irq(irq);
	}
	else{
		HardDeclareAndInit(cpu_id);
		if( L_TEST(l_ienable))
			__sti(); /* emulate the bastard */
	}
}

/* these are exported so that they can be called by rt drivers */
void rtl_global_pend_irq(int ix) { G_PEND(ix); G_SET(g_pend_since_sti); }

int rtl_global_ispending_irq(int ix) { return G_ISPEND(ix); }

void rtl_hard_enable_irq(unsigned int ix)
{
	rtl_irqstate_t flags;
	rtl_no_interrupts (flags);
	rtl_spin_lock(&rtl_global.hard_irq_controller_lock);
	rtl_irq_controller_enable(ix);
	rtl_spin_unlock(&rtl_global.hard_irq_controller_lock);
	rtl_restore_interrupts (flags);
}

void rtl_hard_disable_irq(unsigned int ix)
{
	rtl_irqstate_t flags;
	rtl_no_interrupts (flags);
	rtl_spin_lock(&rtl_global.hard_irq_controller_lock);
	rtl_irq_controller_disable(ix);
	rtl_spin_unlock(&rtl_global.hard_irq_controller_lock);
	rtl_restore_interrupts (flags);
}

/* these are used by schedulers to make sure that Linux interrupts
   do not advance and delay RT tasks 
 Both  need to be called with irqs disabled */
void rtl_make_rt_system_active(void)
{
	HardDeclareAndInit(cpu_id);
	L_SET(l_busy);
}

void rtl_make_rt_system_idle(void)
{
	HardDeclareAndInit(cpu_id);
        L_CLEAR(l_busy);

}

unsigned int rtl_rt_system_is_idle(void)
{
	HardDeclareAndInit(cpu_id);
        return !L_TEST(l_busy);

}

/* requesting and freeing rt interrupts */
/* TODO resolve the smp synchronization problem here */
int rtl_request_global_irq(unsigned int irq, 
			   unsigned int (*handler)(unsigned int, struct pt_regs *))
{

	if (!G_TEST_RTH(irq)) {
		rtl_global_handlers[irq].handler =handler;
		G_SET_RTH(irq);
		mb();
		if(rtl_global_handlers[irq].handler == handler){
			rtl_hard_enable_irq (irq);
			return 0;
		}
	}
	return -EBUSY;
}

int rtl_free_global_irq(unsigned int irq )
{
	if (!G_TEST_AND_CLEAR_RTH(irq)) {
		return -EINVAL;
	}
	return 0;
	/* don't need to clear the handler, because it will never
	   be invoked -- see rtl_intercept. If we wanted to clear the handler
	   we would have a problem with synchronization in the smp case */
}

MODULE_AUTHOR("FSMLabs <support@fsmlabs.com>");
MODULE_DESCRIPTION("Real-Time Linux Main Module");
int quiet;
MODULE_PARM(quiet, "i");

int init_module(void)
{
	int ret;
	if ( arch_takeover() ) {
		printk("arch_takeover failed\n");
		return -1;
	}
	if ( !quiet ) {
		printk("Real-Time Linux Extensions Loaded (http://www.fsmlabs.com/)\n");
	}

	ret = rtl_printf_init();
	if (ret < 0) {
		return ret;
	}
	rtl_soft_sti();
/* 	rtl_debug(); */
	return 0;
}

void cleanup_module(void)
{
	HardDeclareAndInit(cpu_id);
	rtl_printf_cleanup();

	/*
	 * Process any pending interrupts, _hard_ disable
	 * then go on.  This way, we don't get any interrupts
	 * while we're vulnerable and giving up the architecture.
	 *   -- Cort
	 *
	 *   This works for the current processor only -- Michael
	 */
	rtl_hard_cli();
	rtl_soft_sti_no_emulation();
	do {
		rtl_hard_sti();
		rtl_process_pending();
		rtl_hard_cli();
	} while ( G_TEST(g_pend_since_sti) || L_TEST(l_pend_since_sti));
	arch_giveup();
	rtl_hard_sti();
}

spinlock_t debug_lock = SPIN_LOCK_UNLOCKED;

void rtl_debug(void)
{
	int i;
	unsigned long flags, xxx_last_cli = last_cli;

	rtl_spin_lock(&debug_lock);
	rtl_hard_save_flags(flags);
	printk( "RTL: cpu %d\n", rtl_getcpuid() );
	if ( ARCH_DEFINED_ENABLE == 0 )
		printk( "RTL: hard flags %08x %s\n", (unsigned int) flags,
			(flags&ARCH_DEFINED_DISABLE) ? "disabled" : "enabled" );
	else
		printk( "RTL: hard flags %08x %s\n", (unsigned int) flags,
			(flags&ARCH_DEFINED_ENABLE) ? "enabled" : "disabled" );
		
	printk( "RTL: global flags %lx %s%s\n", rtl_global.flags,
		((rtl_global.flags>>g_pend_since_sti)&1) ? "pend_since_sti " : "",
		((rtl_global.flags>>g_initializing)&1) ? "initializing " : "");
	printk( "RTL: global pending " );
	for ( i = 0; i < IRQ_ARRAY_SIZE; i++ )
		printk( "%lx ", rtl_global.pending[i] );
	printk("\n");
	printk( "RTL: global enabled " );
	for ( i = 0; i < IRQ_ARRAY_SIZE; i++ )
		printk( "%lx ", rtl_global.soft_enabled[i] );
	printk( "\n" );
	for ( i = 0 ; i < rtl_num_cpus(); i++ )
	{
		int cpu = cpu_logical_map (i);
		printk( "RTL: cpu%d "
#ifdef __LOCAL_IRQS__
			"local pending %x "
#endif			
			"flags: %x %s%s%s\n", cpu,
			rtl_local[cpu].flags,
#ifdef __LOCAL_IRQS__
			rtl_local[cpu].pending,
#endif			
			((rtl_local[cpu].flags>>l_ienable)&1)?"ienabled ":"disabled ",
			((rtl_local[cpu].flags>>l_pend_since_sti)&1)?"pend_since_sti ":"",
			((rtl_local[cpu].flags>>l_busy)&1)?"busy":"" );
	}
	printk( "RTL: last soft cli from: %08lx\n", xxx_last_cli );
	rtl_spin_unlock(&debug_lock);
}

void rtl_soft_irq_type(int unused, void *junk, struct pt_regs *garbage)
{
	printk("rtl_soft_irq_type(): shouldn't have been called!\n");
}

/* TODO VY: needs some synchronization here. Doesn't request_irq also
   have a problem? */
int rtl_get_soft_irq (void (*handler) (int, void *, struct pt_regs *),
                     const char *devname)
{
	int i;
	int debug = 0;
	for (i = RTL_NR_IRQS - 1; i > 15; i--)
	{
/* 		if (!irq_desc[i].action) */
		{
			if ((debug=request_irq (i, handler, 0, devname, 0)))
			{
/*				printk("RTL_GET_SOFT_IRQ %d: request=%d\n",
				       i, debug);
 				return -EBUSY; */
				continue;
			}
			else
			{
				rtl_virt_enable(i);
				return i;
			}
		}
	}
	printk("RTL_GET_SOFT_IRQ %d: request=%d\n", i, debug);
	return -1;
}


/* compatibility irq handler table */
#include <asm/rt_irq.h>
RTL_V1_HANDLER rtl_v1_irq[NR_IRQS];

extern unsigned int rtl_compat_irq_handler(unsigned int irq, struct pt_regs *regs)
{
	rtl_v1_irq[irq]();
	rtl_hard_enable_irq(irq);
	return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]